diff options
Diffstat (limited to 'clang/test')
44 files changed, 4299 insertions, 137 deletions
diff --git a/clang/test/AST/ByteCode/cxx23.cpp b/clang/test/AST/ByteCode/cxx23.cpp index 72c751d..ce0a4777 100644 --- a/clang/test/AST/ByteCode/cxx23.cpp +++ b/clang/test/AST/ByteCode/cxx23.cpp @@ -1,8 +1,8 @@ // UNSUPPORTED: target={{.*}}-zos{{.*}} -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=ref,ref20,all,all20 %s -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=ref,ref23,all,all23 %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -verify=expected23,all,all23 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=ref,ref20,all,all20 %s +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=ref,ref23,all,all23 %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=expected20,all,all20 %s -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fcxx-exceptions -Wno-deprecated-volatile -verify=expected23,all,all23 %s -fexperimental-new-constant-interpreter #define assert_active(F) if (!__builtin_is_within_lifetime(&F)) (1/0); @@ -393,6 +393,59 @@ namespace UnionMemberCallDiags { static_assert(g()); // all-error {{not an integral constant expression}} \ // all-note {{in call to}} } +#endif + +namespace VolatileWrites { + constexpr void test1() {// all20-error {{never produces a constant expression}} + int k; + volatile int &m = k; + m = 10; // all20-note {{assignment to volatile-qualified type 'volatile int'}} + } + constexpr void test2() { // all20-error {{never produces a constant expression}} + volatile int k = 12; + k = 13; // all20-note {{assignment to volatile-qualified type 'volatile int'}} + } + + constexpr void test3() { // all20-error {{never produces a constant expression}} + volatile int k = 12; // all20-note {{volatile object declared here}} + + *((int *)&k) = 13; // all20-note {{assignment to volatile object 'k' is not allowed in a constant expression}} + } + + constexpr void test4() { // all20-error {{never produces a constant expression}} + int k = 12; + + *((volatile int *)&k) = 13; // all20-note {{assignment to volatile-qualified type 'volatile int' is not allowed in a constant expression}} + } + +#if __cplusplus >= 202302L + struct S { + volatile int k; + }; + constexpr int test5() { + S s; + s.k = 12; // all-note {{assignment to volatile-qualified type 'volatile int' is not}} + + return 0; + } + static_assert(test5() == 0); // all-error{{not an integral constant expression}} \ + // all-note {{in call to}} #endif + + constexpr bool test6(volatile int k) { // ref20-error {{never produces a constant expression}} + k = 14; // ref20-note {{assignment to volatile-qualified type 'volatile int' is not}} \ + // all-note {{assignment to volatile-qualified type 'volatile int' is not}} + return true; + } + static_assert(test6(5)); // all-error {{not an integral constant expression}} \ + // all-note {{in call to}} + + constexpr bool test7(volatile int k) { // all-note {{declared here}} + *((int *)&k) = 13; // all-note {{assignment to volatile object 'k' is not allowed in a constant expression}} + return true; + } + static_assert(test7(12)); // all-error {{not an integral constant expression}} \ + // all-note {{in call to}} +} diff --git a/clang/test/AST/ByteCode/invalid.cpp b/clang/test/AST/ByteCode/invalid.cpp index affb40ea..00db274 100644 --- a/clang/test/AST/ByteCode/invalid.cpp +++ b/clang/test/AST/ByteCode/invalid.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -fexperimental-new-constant-interpreter -verify=expected,both %s -// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=ref,both %s +// RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=ref,both %s namespace Throw { diff --git a/clang/test/Analysis/buffer-overlap-decls.c b/clang/test/Analysis/buffer-overlap-decls.c new file mode 100644 index 0000000..4830f4e --- /dev/null +++ b/clang/test/Analysis/buffer-overlap-decls.c @@ -0,0 +1,23 @@ +// RUN: %clang_analyze_cc1 -verify %s -Wno-incompatible-library-redeclaration \ +// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap +// expected-no-diagnostics + +typedef typeof(sizeof(int)) size_t; + +void memcpy(int dst, int src, size_t size); + +void test_memcpy_proxy() { + memcpy(42, 42, 42); // no-crash +} + +void strcpy(int dst, char *src); + +void test_strcpy_proxy() { + strcpy(42, (char *)42); // no-crash +} + +void strxfrm(int dst, char *src, size_t size); + +void test_strxfrm_proxy() { + strxfrm(42, (char *)42, 42); // no-crash +} diff --git a/clang/test/Analysis/buffer-overlap.c b/clang/test/Analysis/buffer-overlap.c index 8414a76..defb17a 100644 --- a/clang/test/Analysis/buffer-overlap.c +++ b/clang/test/Analysis/buffer-overlap.c @@ -96,3 +96,10 @@ void test_snprintf6() { char b[4] = {0}; snprintf(a, sizeof(a), "%s", b); // no-warning } + +void* memcpy(void* dest, const void* src, size_t count); + +void test_memcpy_esoteric() { +label: + memcpy((char *)&&label, (const char *)memcpy, 1); +} diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif index 85e710f..501d27c 100644 --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif @@ -141,4 +141,4 @@ } ], "version": "[SARIF version]" -}
\ No newline at end of file +} diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg index 3d60a16..03ab418 100644 --- a/clang/test/Analysis/lit.local.cfg +++ b/clang/test/Analysis/lit.local.cfg @@ -17,11 +17,13 @@ config.substitutions.append( ) ) +sed_cmd = "/opt/freeware/bin/sed" if "system-aix" in config.available_features else "sed" + # Filtering command for testing SARIF output against reference output. config.substitutions.append( ( "%normalize_sarif", - "sed -r '%s;%s;%s;%s'" + f"{sed_cmd} -r '%s;%s;%s;%s'" % ( # Replace version strings that are likely to change. r's/"version": ".* version .*"/"version": "[clang version]"/', diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index e901631..4c396d3 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() { // OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float // OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half // OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2 + +void real_on_scalar_from_imag_with_type_promotion() { + _Float16 _Complex a; + _Float16 b = __real__(__imag__ a); +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16> +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float +// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float +// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float> +// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float +// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16 +// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16> + +// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2 +// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2 +// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2 +// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1 +// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float +// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0 +// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1 +// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 + +// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2 +// OGCG: %[[B_ADDR:.*]] = alloca half, align 2 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2 +// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp new file mode 100644 index 0000000..f21d203 --- /dev/null +++ b/clang/test/CIR/CodeGen/delete.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +typedef __typeof(sizeof(int)) size_t; + +struct SizedDelete { + void operator delete(void*, size_t); + int member; +}; +void test_sized_delete(SizedDelete *x) { + delete x; +} + +// SizedDelete::operator delete(void*, unsigned long) +// CIR: cir.func private @_ZN11SizedDeletedlEPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZN11SizedDeletedlEPvm(ptr, i64) + +// CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete +// CIR: %[[X:.*]] = cir.load{{.*}} %{{.*}} +// CIR: %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CIR: cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// LLVM: %[[X:.*]] = load ptr, ptr %{{.*}} +// LLVM: call void @_ZN11SizedDeletedlEPvm(ptr %[[X]], i64 4) + +// OGCG: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// OGCG: %[[X:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[X]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN11SizedDeletedlEPvm(ptr noundef %[[X]], i64 noundef 4) + +// This function is declared below the call in OGCG. +// OGCG: declare void @_ZN11SizedDeletedlEPvm(ptr noundef, i64 noundef) + +struct Contents { + ~Contents() {} +}; +struct Container { + Contents *contents; + ~Container(); +}; +Container::~Container() { delete contents; } + +// Contents::~Contents() +// CIR: cir.func comdat linkonce_odr @_ZN8ContentsD2Ev +// LLVM: define linkonce_odr void @_ZN8ContentsD2Ev + +// operator delete(void*, unsigned long) +// CIR: cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZdlPvm(ptr, i64) + +// Container::~Container() +// CIR: cir.func dso_local @_ZN9ContainerD2Ev +// CIR: %[[THIS:.*]] = cir.load %{{.*}} +// CIR: %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>> +// CIR: %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]] +// CIR: cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> () +// CIR: %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i +// CIR: cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_ZN9ContainerD2Ev +// LLVM: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// LLVM: %[[CONTENTS_PTR_ADDR:.*]] = getelementptr %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// LLVM: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS_PTR_ADDR]] +// LLVM: call void @_ZN8ContentsD2Ev(ptr %[[CONTENTS_PTR]]) +// LLVM: call void @_ZdlPvm(ptr %[[CONTENTS_PTR]], i64 1) + +// OGCG: define dso_local void @_ZN9ContainerD2Ev +// OGCG: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[CONTENTS:.*]] = getelementptr inbounds nuw %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// OGCG: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS]] +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[CONTENTS_PTR]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN8ContentsD2Ev(ptr noundef nonnull align 1 dereferenceable(1) %[[CONTENTS_PTR]]) +// OGCG: call void @_ZdlPvm(ptr noundef %[[CONTENTS_PTR]], i64 noundef 1) + +// These functions are declared/defined below the calls in OGCG. +// OGCG: define linkonce_odr void @_ZN8ContentsD2Ev +// OGCG: declare void @_ZdlPvm(ptr noundef, i64 noundef) diff --git a/clang/test/CIR/CodeGen/lang-c-cpp.cpp b/clang/test/CIR/CodeGen/lang-c-cpp.cpp index e126932..8931783 100644 --- a/clang/test/CIR/CodeGen/lang-c-cpp.cpp +++ b/clang/test/CIR/CodeGen/lang-c-cpp.cpp @@ -3,8 +3,8 @@ // RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir // RUN: FileCheck --check-prefix=CIR-C --input-file=%t.c.cir %s -// CIR-CPP: module attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} -// CIR-C: module attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} +// CIR-CPP: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} +// CIR-C: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} int main() { return 0; diff --git a/clang/test/CIR/CodeGen/module-filename.cpp b/clang/test/CIR/CodeGen/module-filename.cpp new file mode 100644 index 0000000..05e2e92 --- /dev/null +++ b/clang/test/CIR/CodeGen/module-filename.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// Normally, we try to avoid checking the filename of a test, but that's the +// entire point of this test, so we use a wildcard for the path but check the +// filename. +// CIR: module @"{{.*}}module-filename.cpp" + +int main() { + return 0; +} diff --git a/clang/test/CIR/CodeGen/opt-info-attr.cpp b/clang/test/CIR/CodeGen/opt-info-attr.cpp index 444286b..97071d7 100644 --- a/clang/test/CIR/CodeGen/opt-info-attr.cpp +++ b/clang/test/CIR/CodeGen/opt-info-attr.cpp @@ -13,10 +13,10 @@ void f() {} -// CHECK-O0: module attributes +// CHECK-O0: module{{.*}} attributes // CHECK-O0-NOT: cir.opt_info -// CHECK-O1: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} -// CHECK-O2: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} -// CHECK-O3: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} -// CHECK-Os: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} -// CHECK-Oz: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} +// CHECK-O1: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} +// CHECK-O2: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} +// CHECK-O3: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} +// CHECK-Os: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} +// CHECK-Oz: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp index 9139651..4d57f8e 100644 --- a/clang/test/CIR/CodeGen/vbase.cpp +++ b/clang/test/CIR/CodeGen/vbase.cpp @@ -13,19 +13,29 @@ public: class Derived : public virtual Base {}; -// This is just here to force the record types to be emitted. void f() { Derived d; + d.f(); +} + +class DerivedFinal final : public virtual Base {}; + +void g() { + DerivedFinal df; + df.f(); } // CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}> // CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}> +// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}> // LLVM: %class.Derived = type { %class.Base } // LLVM: %class.Base = type { ptr } +// LLVM: %class.DerivedFinal = type { %class.Base } // OGCG: %class.Derived = type { %class.Base } // OGCG: %class.Base = type { ptr } +// OGCG: %class.DerivedFinal = type { %class.Base } // Test the constructor handling for a class with a virtual base. struct A { @@ -47,6 +57,76 @@ void ppp() { B b; } // OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8 +// CIR: cir.func {{.*}}@_Z1fv() { +// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init] +// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> () +// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr> +// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i> +// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i +// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i> +// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i +// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived> +// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// CIR: cir.func {{.*}}@_Z1gv() { +// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init] +// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> () +// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// LLVM: define {{.*}}void @_Z1fv() +// LLVM: %[[D:.+]] = alloca {{.*}} +// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]]) +// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]] +// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32 +// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]] +// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]] +// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0 +// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]] +// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]]) +// LLVM: ret void + +// LLVM: define {{.*}}void @_Z1gv() +// LLVM: %[[DF:.+]] = alloca {{.*}} +// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]]) +// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]] +// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0 +// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]] +// LLVM: call void %[[VFN2]](ptr %[[DF]]) +// LLVM: ret void + +// OGCG: define {{.*}}void @_Z1fv() +// OGCG: %[[D:.+]] = alloca {{.*}} +// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]]) +// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]] +// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32 +// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]] +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]]) +// OGCG: ret void + +// OGCG: define {{.*}}void @_Z1gv() +// OGCG: %[[DF:.+]] = alloca {{.*}} +// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]]) +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]]) +// OGCG: ret void + // Constructor for B // CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B> // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 8b5379a..8bca48d 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -1322,3 +1322,23 @@ void logical_not() { // OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d8fdeea..f242779 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -1390,3 +1390,23 @@ void logical_not_float() { // OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir new file mode 100644 index 0000000..727c067 --- /dev/null +++ b/clang/test/CIR/IR/global-init.cir @@ -0,0 +1,48 @@ +// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s + +!u8i = !cir.int<u, 8> + +!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}> +!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}> +!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}> + +module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { + cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>) + cir.global external @needsCtor = ctor : !rec_NeedsCtor { + %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + } + // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor { + // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + // CHECK: } + + cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>) + cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + } + // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + // CHECK: } + + cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } dtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } + // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } dtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } +} diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 347cd9e..3018bb97 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -985,18 +985,21 @@ double test_mm256_cvtsd_f64(__m256d __a) { // CHECK: extractelement <4 x double> %{{.*}}, i32 0 return _mm256_cvtsd_f64(__a); } +TEST_CONSTEXPR(_mm256_cvtsd_f64((__m256d){8.0, 7.0, 6.0, 5.0}) == 8.0); int test_mm256_cvtsi256_si32(__m256i __a) { // CHECK-LABEL: test_mm256_cvtsi256_si32 // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 return _mm256_cvtsi256_si32(__a); } +TEST_CONSTEXPR(_mm256_cvtsi256_si32((__m256i)(__v8si){8, 7, 6, 5, 4, 3, 2, 1}) == 8); float test_mm256_cvtss_f32(__m256 __a) { // CHECK-LABEL: test_mm256_cvtss_f32 // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } +TEST_CONSTEXPR(_mm256_cvtss_f32((__m256){8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}) == 8.0f); __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c index ded40ca..d0ae0c7 100644 --- a/clang/test/CodeGen/X86/bmi-builtins.c +++ b/clang/test/CodeGen/X86/bmi-builtins.c @@ -1,7 +1,16 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 #include <immintrin.h> @@ -48,20 +57,20 @@ unsigned int test_tzcnt_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test__tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test__tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return __tzcnt_u64(__X); } long long test_mm_tzcnt_64(unsigned long long __X) { -// TZCNT-LABEL: test_mm_tzcnt_64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_mm_tzcnt_64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _mm_tzcnt_64(__X); } unsigned long long test_tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test_tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _tzcnt_u64(__X); } #endif @@ -103,36 +112,36 @@ unsigned int test__blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return __andn_u64(__X, __Y); } unsigned long long test__bextr_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__bextr_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test__bextr_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return __bextr_u64(__X, __Y); } unsigned long long test__blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return __blsi_u64(__X); } unsigned long long test__blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return __blsmsk_u64(__X); } unsigned long long test__blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return __blsr_u64(__X); } #endif @@ -186,49 +195,49 @@ unsigned int test_blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test_andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return _andn_u64(__X, __Y); } unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y, unsigned int __Z) { -// CHECK-LABEL: test_bextr_u64 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: shl i32 %{{.*}}, 8 -// CHECK: or i32 %{{.*}}, %{{.*}} -// CHECK: zext i32 %{{.*}} to i64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr_u64 +// X64: and i32 %{{.*}}, 255 +// X64: and i32 %{{.*}}, 255 +// X64: shl i32 %{{.*}}, 8 +// X64: or i32 %{{.*}}, %{{.*}} +// X64: zext i32 %{{.*}} to i64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr_u64(__X, __Y, __Z); } unsigned long long test_bextr2_u64(unsigned long long __X, unsigned long long __Y) { -// CHECK-LABEL: test_bextr2_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr2_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr2_u64(__X, __Y); } unsigned long long test_blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return _blsi_u64(__X); } unsigned long long test_blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return _blsmsk_u64(__X); } unsigned long long test_blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return _blsr_u64(__X); } #endif diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 48424f5..1b2cb90 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 + #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c index d916627..89746bf 100644 --- a/clang/test/CodeGen/X86/tbm-builtins.c +++ b/clang/test/CodeGen/X86/tbm-builtins.c @@ -1,5 +1,12 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK #include <x86intrin.h> @@ -13,14 +20,14 @@ unsigned int test__bextri_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__bextri_u64(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64 - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) + // X64-LABEL: test__bextri_u64 + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) return __bextri_u64(a, 2); } unsigned long long test__bextri_u64_bigint(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64_bigint - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) + // X64-LABEL: test__bextri_u64_bigint + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) return __bextri_u64(a, 0x7fffffffffLL); } #endif @@ -34,9 +41,9 @@ unsigned int test__blcfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blcfill_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcfill_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64: %{{.*}} = and i64 %{{.*}}, [[TMP]] return __blcfill_u64(a); } #endif @@ -51,10 +58,10 @@ unsigned int test__blci_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blci_u64(unsigned long long a) { - // CHECK-LABEL: test__blci_u64 - // CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1 - // CHECK: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 - // CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]] + // X64-LABEL: test__blci_u64 + // X64: [[TMP1:%.*]] = add i64 %{{.*}}, 1 + // X64: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 + // X64: %{{.*}} = or i64 %{{.*}}, [[TMP2]] return __blci_u64(a); } #endif @@ -69,10 +76,10 @@ unsigned int test__blcic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcic_u64(unsigned long long a) { - // CHECK-LABEL: test__blcic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blcic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __blcic_u64(a); } #endif @@ -86,9 +93,9 @@ unsigned int test__blcmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__blcmsk_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcmsk_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] return __blcmsk_u64(a); } #endif @@ -102,9 +109,9 @@ unsigned int test__blcs_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcs_u64(unsigned long long a) { - // CHECK-LABEL: test__blcs_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcs_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blcs_u64(a); } #endif @@ -118,9 +125,9 @@ unsigned int test__blsfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blsfill_u64 - // CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blsfill_u64 + // X64: [[TMP:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blsfill_u64(a); } #endif @@ -135,10 +142,10 @@ unsigned int test__blsic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsic_u64(unsigned long long a) { - // CHECK-LABEL: test__blsic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blsic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __blsic_u64(a); } #endif @@ -153,10 +160,10 @@ unsigned int test__t1mskc_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__t1mskc_u64(unsigned long long a) { - // CHECK-LABEL: test__t1mskc_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__t1mskc_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __t1mskc_u64(a); } #endif @@ -171,10 +178,10 @@ unsigned int test__tzmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__tzmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__tzmsk_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__tzmsk_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __tzmsk_u64(a); } #endif diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c new file mode 100644 index 0000000..ef68c79 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s + +// CHECK-LABEL: define dso_local void @test_locals( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr +// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]] +// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +// +void test_locals(void) { + __amdgpu_texture_t img; + (void)img; +} diff --git a/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp new file mode 100644 index 0000000..0dbd517 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp @@ -0,0 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s +namespace std { class type_info; } +auto &a = typeid(__amdgpu_texture_t); +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp index 5920ced..137a49b 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( // CHECK-NEXT: entry: @@ -21,6 +24,43 @@ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter64Py( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter32PVj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter64PVy( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() { // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global32() { global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); @@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global64() { global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); @@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant32() { __UINT32_TYPE__ local_val; @@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant64() { __UINT64_TYPE__ local_val; @@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope64() { __attribute__((shared)) __UINT64_TYPE__ val; diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp index 1e977dd..dd1ca45 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -12,6 +15,25 @@ // CHECK-NEXT: fence syncscope("agent") acq_rel // CHECK-NEXT: fence syncscope("workgroup") release // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv( +// GCN-SAME: ) #[[ATTR0:[0-9]+]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst +// GCN-NEXT: fence syncscope("agent") acquire +// GCN-NEXT: fence seq_cst +// GCN-NEXT: fence syncscope("agent") acq_rel +// GCN-NEXT: fence syncscope("workgroup") release +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire +// AMDGCNSPIRV-NEXT: fence seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release +// AMDGCNSPIRV-NEXT: ret void // void test_memory_fence_success() { @@ -35,6 +57,25 @@ void test_memory_fence_success() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_localv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_local() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -58,6 +99,25 @@ void test_local() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z11test_globalv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]] +// GCN-NEXT: fence seq_cst, !mmra [[META4]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: ret void // void test_global() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global"); @@ -80,6 +140,25 @@ void test_global() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_imagev( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_image() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -99,13 +178,33 @@ void test_image() { // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_mixedv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// AMDGCNSPIRV-NEXT: ret void // void test_mixed() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global"); __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local"); } -//. // CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} // CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} // CHECK: [[META5]] = !{[[META4]], [[META3]]} //. +// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// GCN: [[META5]] = !{[[META4]], [[META3]]} +//. +// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]} +//. diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp new file mode 100644 index 0000000..06a496e --- /dev/null +++ b/clang/test/CodeGenCXX/gh56652.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +namespace GH56652{ + +struct foo {}; + +template <typename T> struct bar { + using type = T; + + template <foo> inline static constexpr auto b = true; +}; + +template <typename T> +concept C = requires(T a) { T::template b<foo{}>; }; + +template <typename T> auto fn(T) { + if constexpr (!C<T>) + return foo{}; + else + return T{}; +} + +auto a = decltype(fn(bar<int>{})){}; + +} + +namespace GH116319 { + +template <int = 0> struct a { +template <class> static constexpr auto b = 2; +template <class> static void c() noexcept(noexcept(b<int>)) {} +}; + +void test() { a<>::c<int>(); } + + +} + +// CHECK: %"struct.GH56652::bar" = type { i8 } +// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any +// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index 19ab656..7cd3f14 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -1,13 +1,13 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s typedef unsigned int uint; typedef unsigned long ulong; @@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} #if !defined(__SPIRV__) void test_global_add_f32(float *rtn, global float *addr, float x) { #else diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 5f202ba..6bb20bf 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -1,9 +1,9 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1) // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} #if !defined(__SPIRV__) @@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) { // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} @@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) { // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 039d032..ab0b0b9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4 res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4 res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent"); // CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4 diff --git a/clang/test/Driver/modules-print-library-module-manifest-path.cpp b/clang/test/Driver/modules-print-library-module-manifest-path.cpp index 7606713..af0f124 100644 --- a/clang/test/Driver/modules-print-library-module-manifest-path.cpp +++ b/clang/test/Driver/modules-print-library-module-manifest-path.cpp @@ -18,6 +18,14 @@ // RUN: --target=x86_64-linux-gnu 2>&1 \ // RUN: | FileCheck libcxx.cpp +// check that -nostdlib causes no library-provided module manifest to +// be reported, even when libc++.modules.json is present. +// RUN: %clang -print-library-module-manifest-path \ +// RUN: -nostdlib \ +// RUN: -resource-dir=%t/Inputs/usr/lib/x86_64-linux-gnu \ +// RUN: --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck libcxx-no-module-json.cpp + // for macos there is a different directory structure // where the library and libc++.modules.json file are in lib // directly but headers are in clang/ver directory which diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index ced5bca..8eb9ea0 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -148,7 +148,7 @@ // init_captures checked below -#if check(modules, 0, 0, 0, 0, 0, 0, 0) +#if check(modules, 0, 0, 0, 0, 1, 1, 1) // FIXME: 201907 in C++20 #error "wrong value for __cpp_modules" #endif diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp index 83632db..cb4bcc9 100644 --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -27,7 +27,6 @@ struct S { ~S() {} }; - template <typename T, int length> T tmain() { T t; @@ -60,6 +59,15 @@ T tmain() { } extern S<float> **foo(); +int g_arr[10]; + +void reductionArrayElement() { +#pragma omp parallel +#pragma omp for reduction(+:g_arr[1]) + for (int i = 0; i < 10; i++) { + g_arr[1] += i; + } +} int main() { #ifdef LAMBDA @@ -164,6 +172,7 @@ int main() { #pragma omp for reduction(& : var3) for (int i = 0; i < 10; ++i) ; + reductionArrayElement(); return tmain<int, 42>(); #endif } @@ -535,6 +544,26 @@ int main() { //. // CHECK4: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8 //. + +// CHECK1-LABEL: define {{.*}}reductionArrayElement{{.*}}.omp_outlined{{.*}} +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1: [[G_ARR:%.*]] = alloca i32, align 4 +// CHECK1: [[TMP0:%.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr @g_arr to i64){{.*}} +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[G_ARR:%.*]], i64 [[TMP0]] +// CHECK1: omp.inner.for.body: +// CHECK1: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]],{{.+}} +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void {{.*}}__kmpc_for_static_fini{{.+}} +// CHECK1: {{.*}}call i32 {{.*}}__kmpc_reduce{{.+}} +// CHECK1: omp.reduction.default: +// CHECK1-NEXT: call void @__kmpc_barrier{{.+}} +// CHECK1-NEXT: ret void +// + // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: @@ -614,6 +643,7 @@ int main() { // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.11, ptr [[TMP7]]) // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8 // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.12, ptr [[TMP8]]) +// CHECK1-NEXT: call void {{.*}}reductionArrayElement{{.*}} // CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() // CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000..283f588 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,397 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template<int Factor1, int Factor2> +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template<typename T, T Step> +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4<int, 4>(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template<int F, int C> +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} + +#endif diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000..742c280 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,2328 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template <typename T> +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2<int>(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000..b86ce95 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,209 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error@+4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + + // expected-error@+1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + //expected-error@+3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + for(int j = 0; j < 100; ++j); + } + + //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error@+3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning@+2 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error@+1 {{looprange clause selects loops from 1 to 4 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 1 to 420 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 1 to 6 but this exceeds the number of loops (5) in the loop sequence}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error@+1 {{looprange clause selects loops from 2 to 4 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } +} + +// In a template context, but expression itself not instantiation-dependent +template <typename T> +static void templated_func() { + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{looprange clause selects loops from 3 to 5 but this exceeds the number of loops (3) in the loop sequence}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template <int V> +static void templated_func_value_dependent() { + + //expected-warning@+1 {{looprange clause selects a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template <typename T> +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note@+1 {{in instantiation of function template specialization 'templated_func<int>' requested here}} + templated_func<int>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent<int>' requested here}} + templated_func_type_dependent<int>(); +} + + diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp index 7ffb7aae..8c7a778 100644 --- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp +++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20 -// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23 -// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 -// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23 - -//cxx23-no-diagnostics +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23 auto L1 = [] constexpr {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} @@ -14,3 +12,25 @@ auto L3 = [] static {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} // cxx20-warning@-2 {{static lambdas are a C++23 extension}} // precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}} + +namespace GH161070 { +void t1() { int a = [] __arm_streaming; } +// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}} +// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}} + +void t2() { int a = [] [[assume(true)]]; } +// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}} +// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}} +} diff --git a/clang/test/SemaCUDA/vararg.cu b/clang/test/SemaCUDA/vararg.cu index 34ef367..0238f42 100644 --- a/clang/test/SemaCUDA/vararg.cu +++ b/clang/test/SemaCUDA/vararg.cu @@ -10,7 +10,7 @@ #include <stdarg.h> #include "Inputs/cuda.h" -__device__ void foo() { +__global__ void foo() { va_list list; va_arg(list, int); #ifdef EXPECT_VA_ARG_ERR diff --git a/clang/test/SemaCXX/amdgpu-image-rsrc.cpp b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..61a82d4 --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_texture_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + reinterpret_cast<__amdgpu_texture_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + (void)(v + v); // expected-error {{invalid operands to binary expression ('__amdgpu_texture_t' and '__amdgpu_texture_t')}} + int x(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_texture_t'}} + __amdgpu_texture_t k; +} + +template<class T> void bar(T); +void use(__amdgpu_texture_t r) { bar(r); } +struct S { __amdgpu_texture_t r; int a; }; diff --git a/clang/test/SemaCXX/bitfield-layout.cpp b/clang/test/SemaCXX/bitfield-layout.cpp index 7efd1d3..f30218b 100644 --- a/clang/test/SemaCXX/bitfield-layout.cpp +++ b/clang/test/SemaCXX/bitfield-layout.cpp @@ -35,7 +35,7 @@ CHECK_SIZE(Test4, 8); CHECK_ALIGN(Test4, 8); struct Test5 { - char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4294967297 bits) exceeds the width of its type; value will be truncated to 8 bits}} + char c : 0x100000001; // expected-warning {{width of bit-field 'c' (4'294'967'297 bits) exceeds the width of its type; value will be truncated to 8 bits}} }; // Size and align don't really matter here, just make sure we don't crash. CHECK_SIZE(Test5, 1); diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp index 739485b..45a4c4c 100644 --- a/clang/test/SemaCXX/decltype.cpp +++ b/clang/test/SemaCXX/decltype.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s // PR5290 int const f0(); @@ -156,6 +157,8 @@ struct A { } }; + + // This shouldn't crash. static_assert(A<int>().f<int>() == 0, ""); // The result should not be dependent. @@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion // expected-note@-1 {{expression evaluates to '0 != 0'}} } + +#if __cplusplus >= 201703L +namespace GH160497 { + +template <class> struct S { + template <class> + inline static auto mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return 42; + }(); +}; + +using T = decltype(S<void>::mem<void>); + // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}} + + +template <class> struct S2 { + template <class> + inline static auto* mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return static_cast<int*>(nullptr); + }(); +}; + +using T2 = decltype(S2<void>::mem<void>); +//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}} + +template <class> struct S3 { + template <class> + inline static int mem = // Check we don't instantiate when the type is not deduced. + [] { static_assert(false); + return 42; + }(); +}; + +using T = decltype(S3<void>::mem<void>); +} + +namespace N1 { + +template<class> +struct S { + template<class> + inline static auto mem = 42; +}; + +using T = decltype(S<void>::mem<void>); + +T y = 42; + +} + +namespace GH161196 { + +template <typename> struct A { + static constexpr int digits = 0; +}; + +template <typename> struct B { + template <int, typename MaskInt = int, int = A<MaskInt>::digits> + static constexpr auto XBitMask = 0; +}; + +struct C { + using ReferenceHost = B<int>; + template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask; +}; + +void test() { (void)C::XBitMask<0>; } + +} +#endif + template<typename> class conditional { }; diff --git a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp index 097ada3..436dfb9 100644 --- a/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp +++ b/clang/test/SemaCXX/invalid-requirement-requires-expr.cpp @@ -17,8 +17,7 @@ constexpr bool A<x>::far() { b.data_member; requires A<x-1>::far(); // #Invalid // expected-error@#Invalid {{recursive template instantiation exceeded maximum depth}} - // expected-note@#Invalid {{in instantiation}} - // expected-note@#Invalid 2 {{while}} + // expected-note@#Invalid 3 {{while}} // expected-note@#Invalid {{contexts in backtrace}} // expected-note@#Invalid {{increase recursive template instantiation depth}} }; diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp index 3f01247..d49330f 100644 --- a/clang/test/SemaCXX/type-traits.cpp +++ b/clang/test/SemaCXX/type-traits.cpp @@ -2038,6 +2038,49 @@ void is_implicit_lifetime(int n) { static_assert(__builtin_is_implicit_lifetime(int * __restrict)); } +namespace GH160610 { +class NonAggregate { +public: + NonAggregate() = default; + + NonAggregate(const NonAggregate&) = delete; + NonAggregate& operator=(const NonAggregate&) = delete; +private: + int num; +}; + +class DataMemberInitializer { +public: + DataMemberInitializer() = default; + + DataMemberInitializer(const DataMemberInitializer&) = delete; + DataMemberInitializer& operator=(const DataMemberInitializer&) = delete; +private: + int num = 0; +}; + +class UserProvidedConstructor { +public: + UserProvidedConstructor() {} + + UserProvidedConstructor(const UserProvidedConstructor&) = delete; + UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete; +}; + +static_assert(__builtin_is_implicit_lifetime(NonAggregate)); +static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer)); +static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor)); + +#if __cplusplus >= 202002L +template <typename T> +class Tpl { + Tpl() requires false = default ; +}; +static_assert(!__builtin_is_implicit_lifetime(Tpl<int>)); + +#endif +} + void is_signed() { //static_assert(__is_signed(char)); diff --git a/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl new file mode 100644 index 0000000..dc56494 --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl @@ -0,0 +1,13 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa %s + +void f() { + int n = 3; + __amdgpu_texture_t v = (__amdgpu_texture_t)0; // expected-error {{used type '__amdgpu_texture_t' where arithmetic or pointer type is required}} + int k = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_texture_t'}} + (void)(v + v); // expected-error {{invalid operands}} + __amdgpu_texture_t r; + int *p = (int*)r; // expected-error {{operand of type '__amdgpu_texture_t' where arithmetic or pointer type is required}} +} diff --git a/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..51b3f72 --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp @@ -0,0 +1,12 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 5; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + (void)(v + v); // expected-error {{invalid operands to binary expression}} + } +} diff --git a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp index 2b519e9..66fd1af 100644 --- a/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp +++ b/clang/test/SemaTemplate/instantiation-depth-subst-2.cpp @@ -2,5 +2,6 @@ template<int N> struct S { }; template<typename T> S<T() + T()> operator+(T, T); // expected-error {{instantiation exceeded maximum depth}} expected-note 2{{while substituting}} +// expected-note@-1 {{use -ftemplate-depth=N to increase recursive template instantiation depth}} S<0> s; int k = s + s; // expected-note {{while substituting}} diff --git a/clang/test/SemaTemplate/instantiation-depth-subst.cpp b/clang/test/SemaTemplate/instantiation-depth-subst.cpp index 062a8ed..17944bc 100644 --- a/clang/test/SemaTemplate/instantiation-depth-subst.cpp +++ b/clang/test/SemaTemplate/instantiation-depth-subst.cpp @@ -3,7 +3,8 @@ // PR9793 template<typename T> auto f(T t) -> decltype(f(t)); // \ // expected-error {{recursive template instantiation exceeded maximum depth of 2}} \ -// expected-note 2 {{while substituting}} +// expected-note 2 {{while substituting}} \ +// expected-note {{use -ftemplate-depth=N to increase recursive template instantiation depth}} struct S {}; int k = f(S{}); // expected-note {{while substituting}} |