diff options
Diffstat (limited to 'clang/test')
28 files changed, 2423 insertions, 266 deletions
| diff --git a/clang/test/CIR/CodeGen/builtin_prefetech.c b/clang/test/CIR/CodeGen/builtin_prefetch.c index cfe85b9..cfe85b9 100644 --- a/clang/test/CIR/CodeGen/builtin_prefetech.c +++ b/clang/test/CIR/CodeGen/builtin_prefetch.c diff --git a/clang/test/CXX/drs/cwg0xx.cpp b/clang/test/CXX/drs/cwg0xx.cpp index 805be67..10a4f1d 100644 --- a/clang/test/CXX/drs/cwg0xx.cpp +++ b/clang/test/CXX/drs/cwg0xx.cpp @@ -90,6 +90,8 @@ namespace cwg5 { // cwg5: 3.1    const C c = e;  } // namespace cwg5 +// cwg6 is in cwg6.cpp +  namespace cwg7 { // cwg7: 3.4    class A { public: ~A(); };    class B : virtual private A {}; // #cwg7-B diff --git a/clang/test/CXX/drs/cwg28xx.cpp b/clang/test/CXX/drs/cwg28xx.cpp index a6b2b99..d0ee191 100644 --- a/clang/test/CXX/drs/cwg28xx.cpp +++ b/clang/test/CXX/drs/cwg28xx.cpp @@ -61,6 +61,24 @@ namespace cwg2819 { // cwg2819: 19 c++26  #endif  } // namespace cwg2819 +namespace cwg2823 { // cwg2823: no +#if __cplusplus >= 201103L +  constexpr int *p = 0; +  constexpr int *q1 = &*p; +  // expected-error@-1 {{constexpr variable 'q1' must be initialized by a constant expression}} +  //   expected-note@-2 {{dereferencing a null pointer is not allowed in a constant expression}} +  // FIXME: invalid: dereferencing a null pointer. +  constexpr int *q2 = &p[0]; + +  int arr[32]; +  constexpr int *r = arr; +  // FIXME: invalid: dereferencing a past-the-end pointer. +  constexpr int *s1 = &*(r + 32); +  // FIXME: invalid: dereferencing a past-the-end pointer. +  constexpr int *s2 = &r[32]; +#endif +} +  namespace cwg2847 { // cwg2847: 19 review 2024-03-01  #if __cplusplus >= 202002L diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp index 37186e3..a4995dd 100644 --- a/clang/test/CXX/drs/cwg2xx.cpp +++ b/clang/test/CXX/drs/cwg2xx.cpp @@ -230,6 +230,38 @@ namespace cwg211 { // cwg211: 2.7    };  } // namespace cwg211 +namespace cwg212 { // cwg212: 2.7 +  template<typename T> struct Base; +  template<typename T> struct Derived; + +  int *overload(void*); +  float *overload(Base<int>*); +  double *overload(Base<long>*); + +  void f(Derived<int> *p) { +    // OK, calls void* overload. +    int *a = overload(p); + +    Base<int> *q = p; +    // expected-error@-1 {{cannot initialize a variable of type 'Base<int> *' with an lvalue of type 'Derived<int> *'}} +  } + +  template<typename T> struct Base {}; +  template<typename T> struct Derived : Base<T> {}; + +  void g(Derived<long> *p) { +    // OK, instantiates and calls Base<long>* overlod. +    double *b = overload(p); +    (void)b; +  } + +  void h(Derived<float> *p) { +    // OK, instantiates and converts. +    Base<float> *q = p; +    (void)q; +  } +} +  namespace cwg213 { // cwg213: 2.7    template <class T> struct A : T {      void h(T t) { @@ -593,6 +625,9 @@ namespace cwg231 { // cwg231: 2.7    }  } // namespace cwg231 +// 232 is NAD; the desired behavior is described in 2823. +// cwg232: dup 2823 +  // cwg234: na  // cwg235: na diff --git a/clang/test/CXX/drs/cwg6.cpp b/clang/test/CXX/drs/cwg6.cpp new file mode 100644 index 0000000..4752e72 --- /dev/null +++ b/clang/test/CXX/drs/cwg6.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define static_assert(expr) __extension__ _Static_assert(expr) +#define noexcept throw() +#endif + +namespace cwg6 { // cwg6: 2.7 +#if __cplusplus >= 201103L +struct Counter { +  int copies; +  constexpr Counter(int copies) : copies(copies) {} +  constexpr Counter(const Counter& other) : copies(other.copies + 1) {} +}; + +// Passing an lvalue by value makes a non-elidable copy. +constexpr int PassByValue(Counter c) { return c.copies; } +constexpr int PassByValue2(Counter c) { return PassByValue(c); } +constexpr int PassByValue3(Counter c) { return PassByValue2(c); } +static_assert(PassByValue(Counter(0)) == 0, "expect no copies"); +static_assert(PassByValue2(Counter(0)) == 1, "expect 1 copy"); +static_assert(PassByValue3(Counter(0)) == 2, "expect 2 copies"); +#endif + +struct A { +  A() noexcept; +  A(const A&) noexcept; +  ~A() noexcept; +}; + +inline void f(A a) noexcept {} + +// CHECK-LABEL: define {{.*}} @_ZN4cwg64callEv +void call() { +  A a; +  // We copy the parameter here, even though object is not mutated by f and +  // otherwise satisfies the criteria for the proposed CWG6 optimization. +  // CHECK: call {{.*}} @_ZN4cwg61AC1ERKS0_( +  // CHECK: call {{.*}} @_ZN4cwg61fENS_1AE( +  f(a); +  // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( +  // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( +} + +} // namespace cwg6 diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c index aa0387d..aae5097 100644 --- a/clang/test/CodeGen/AArch64/neon-across.c +++ b/clang/test/CodeGen/AArch64/neon-across.c @@ -113,9 +113,8 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_S8_I]]  //  int8_t test_vmaxv_s8(int8x8_t a) {    return vmaxv_s8(a); @@ -124,9 +123,8 @@ int8_t test_vmaxv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_S16_I]]  //  int16_t test_vmaxv_s16(int16x4_t a) {    return vmaxv_s16(a); @@ -135,9 +133,8 @@ int16_t test_vmaxv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_U8_I]]  //  uint8_t test_vmaxv_u8(uint8x8_t a) {    return vmaxv_u8(a); @@ -146,9 +143,8 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_U16_I]]  //  uint16_t test_vmaxv_u16(uint16x4_t a) {    return vmaxv_u16(a); @@ -157,9 +153,8 @@ uint16_t test_vmaxv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_S8_I]]  //  int8_t test_vmaxvq_s8(int8x16_t a) {    return vmaxvq_s8(a); @@ -168,9 +163,8 @@ int8_t test_vmaxvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_S16_I]]  //  int16_t test_vmaxvq_s16(int16x8_t a) {    return vmaxvq_s16(a); @@ -179,7 +173,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_S32_I]]  //  int32_t test_vmaxvq_s32(int32x4_t a) { @@ -189,9 +183,8 @@ int32_t test_vmaxvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_U8_I]]  //  uint8_t test_vmaxvq_u8(uint8x16_t a) {    return vmaxvq_u8(a); @@ -200,9 +193,8 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_U16_I]]  //  uint16_t test_vmaxvq_u16(uint16x8_t a) {    return vmaxvq_u16(a); @@ -211,7 +203,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_U32_I]]  //  uint32_t test_vmaxvq_u32(uint32x4_t a) { @@ -221,9 +213,8 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_S8_I]]  //  int8_t test_vminv_s8(int8x8_t a) {    return vminv_s8(a); @@ -232,9 +223,8 @@ int8_t test_vminv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_S16_I]]  //  int16_t test_vminv_s16(int16x4_t a) {    return vminv_s16(a); @@ -243,9 +233,8 @@ int16_t test_vminv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_U8_I]]  //  uint8_t test_vminv_u8(uint8x8_t a) {    return vminv_u8(a); @@ -254,9 +243,8 @@ uint8_t test_vminv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_U16_I]]  //  uint16_t test_vminv_u16(uint16x4_t a) {    return vminv_u16(a); @@ -265,9 +253,8 @@ uint16_t test_vminv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_S8_I]]  //  int8_t test_vminvq_s8(int8x16_t a) {    return vminvq_s8(a); @@ -276,9 +263,8 @@ int8_t test_vminvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_S16_I]]  //  int16_t test_vminvq_s16(int16x8_t a) {    return vminvq_s16(a); @@ -287,7 +273,7 @@ int16_t test_vminvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_S32_I]]  //  int32_t test_vminvq_s32(int32x4_t a) { @@ -297,9 +283,8 @@ int32_t test_vminvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_U8_I]]  //  uint8_t test_vminvq_u8(uint8x16_t a) {    return vminvq_u8(a); @@ -308,9 +293,8 @@ uint8_t test_vminvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_U16_I]]  //  uint16_t test_vminvq_u16(uint16x8_t a) {    return vminvq_u16(a); @@ -319,7 +303,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_U32_I]]  //  uint32_t test_vminvq_u32(uint32x4_t a) { @@ -329,9 +313,8 @@ uint32_t test_vminvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_S8_I]]  //  int8_t test_vaddv_s8(int8x8_t a) {    return vaddv_s8(a); @@ -340,9 +323,8 @@ int8_t test_vaddv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_S16_I]]  //  int16_t test_vaddv_s16(int16x4_t a) {    return vaddv_s16(a); @@ -351,9 +333,8 @@ int16_t test_vaddv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_U8_I]]  //  uint8_t test_vaddv_u8(uint8x8_t a) {    return vaddv_u8(a); @@ -362,9 +343,8 @@ uint8_t test_vaddv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_U16_I]]  //  uint16_t test_vaddv_u16(uint16x4_t a) {    return vaddv_u16(a); @@ -373,9 +353,8 @@ uint16_t test_vaddv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_S8_I]]  //  int8_t test_vaddvq_s8(int8x16_t a) {    return vaddvq_s8(a); @@ -384,9 +363,8 @@ int8_t test_vaddvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_S16_I]]  //  int16_t test_vaddvq_s16(int16x8_t a) {    return vaddvq_s16(a); @@ -395,7 +373,7 @@ int16_t test_vaddvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_S32_I]]  //  int32_t test_vaddvq_s32(int32x4_t a) { @@ -405,9 +383,8 @@ int32_t test_vaddvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_U8_I]]  //  uint8_t test_vaddvq_u8(uint8x16_t a) {    return vaddvq_u8(a); @@ -416,9 +393,8 @@ uint8_t test_vaddvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_U16_I]]  //  uint16_t test_vaddvq_u16(uint16x8_t a) {    return vaddvq_u16(a); @@ -427,7 +403,7 @@ uint16_t test_vaddvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_U32_I]]  //  uint32_t test_vaddvq_u32(uint32x4_t a) { diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 035e1ca..1c628bb 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -12643,7 +12643,7 @@ uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {  // CHECK-LABEL: define dso_local i64 @test_vpaddd_s64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VPADDD_S64_I]]  //  int64_t test_vpaddd_s64(int64x2_t a) { @@ -23227,7 +23227,7 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {  // CHECK-LABEL: define dso_local i64 @test_vpaddd_u64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VPADDD_U64_I]]  //  uint64_t test_vpaddd_u64(uint64x2_t a) { @@ -23237,7 +23237,7 @@ uint64_t test_vpaddd_u64(uint64x2_t a) {  // CHECK-LABEL: define dso_local i64 @test_vaddvq_s64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VADDVQ_S64_I]]  //  int64_t test_vaddvq_s64(int64x2_t a) { @@ -23247,7 +23247,7 @@ int64_t test_vaddvq_s64(int64x2_t a) {  // CHECK-LABEL: define dso_local i64 @test_vaddvq_u64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VADDVQ_U64_I]]  //  uint64_t test_vaddvq_u64(uint64x2_t a) { @@ -23878,7 +23878,7 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {  // CHECK-LABEL: define dso_local i32 @test_vminv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINV_S32_I]]  //  int32_t test_vminv_s32(int32x2_t a) { @@ -23888,7 +23888,7 @@ int32_t test_vminv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vminv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINV_U32_I]]  //  uint32_t test_vminv_u32(uint32x2_t a) { @@ -23898,7 +23898,7 @@ uint32_t test_vminv_u32(uint32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vmaxv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXV_S32_I]]  //  int32_t test_vmaxv_s32(int32x2_t a) { @@ -23908,7 +23908,7 @@ int32_t test_vmaxv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vmaxv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXV_U32_I]]  //  uint32_t test_vmaxv_u32(uint32x2_t a) { @@ -23918,7 +23918,7 @@ uint32_t test_vmaxv_u32(uint32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vaddv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDV_S32_I]]  //  int32_t test_vaddv_s32(int32x2_t a) { @@ -23928,7 +23928,7 @@ int32_t test_vaddv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vaddv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDV_U32_I]]  //  uint32_t test_vaddv_u32(uint32x2_t a) { diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392..35fa65a 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) {    return _mm_insert_ps(x, y, 4);  } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all +  __m128i test_mm_max_epi8(__m128i x, __m128i y) {    // CHECK-LABEL: test_mm_max_epi8    // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c new file mode 100644 index 0000000..0dbf81d --- /dev/null +++ b/clang/test/CodeGen/builtins-extended-image.c @@ -0,0 +1,1528 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts %s -emit-llvm -o - | FileCheck %s + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP3]] +// +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP8]] +// +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP11]] +// +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP9]] +// +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP3]] +// +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP8]] +// +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP11]] +// +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP9]] +// +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP4]] +// +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP5]] +// +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP8]] +// +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP5]] +// +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP6]] +// +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP9]] +// +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} diff --git a/clang/test/CodeGenCXX/ubsan-coroutines.cpp b/clang/test/CodeGenCXX/ubsan-coroutines.cpp index 04ab050..60c89a4 100644 --- a/clang/test/CodeGenCXX/ubsan-coroutines.cpp +++ b/clang/test/CodeGenCXX/ubsan-coroutines.cpp @@ -1,6 +1,7 @@  // This test merely verifies that emitting the object file does not cause a  // crash when the LLVM coroutines passes are run.  // RUN: %clang_cc1 -emit-obj -std=c++2a -fsanitize=null %s -o %t.o +// UNSUPPORTED: target={{.*}}-zos{{.*}}  namespace std {  template <typename R, typename... T> struct coroutine_traits { diff --git a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip index 571fba1..6dc57c4 100644 --- a/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip +++ b/clang/test/CodeGenHIP/maybe_undef-attr-verify.hip @@ -20,7 +20,7 @@  #define __maybe_undef __attribute__((maybe_undef))  #define WARP_SIZE 64 -static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE__; +static constexpr int warpSize = WARP_SIZE;  __device__ static inline unsigned int __lane_id() {      return  __builtin_amdgcn_mbcnt_hi( diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index d390418..31fd0e7 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -1,5 +1,5 @@  // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s  // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck -enable-var-scope %s  // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s  // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck -enable-var-scope %s @@ -48,7 +48,3 @@ void test_read_exec_lo(global uint* out) {  void test_read_exec_hi(global uint* out) {    *out = __builtin_amdgcn_read_exec_hi();  } - -#if __AMDGCN_WAVEFRONT_SIZE != 32 -#error Wrong wavesize detected -#endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index d851ec7..758b5aa 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -50,7 +50,3 @@ void test_read_exec_lo(global ulong* out) {  void test_read_exec_hi(global ulong* out) {    *out = __builtin_amdgcn_read_exec_hi();  } - -#if defined(__AMDGCN_WAVEFRONT_SIZE__) && __AMDGCN_WAVEFRONT_SIZE__ != 64 -#error Wrong wavesize detected -#endif diff --git a/clang/test/DebugInfo/Generic/bit-int.c b/clang/test/DebugInfo/Generic/bit-int.c index 94b9301..88ecc13 100644 --- a/clang/test/DebugInfo/Generic/bit-int.c +++ b/clang/test/DebugInfo/Generic/bit-int.c @@ -4,5 +4,5 @@  unsigned _BitInt(17) a;  _BitInt(2) b; -// CHECK: !DIBasicType(name: "_BitInt", size: 8, dataSize: 2, encoding: DW_ATE_signed) -// CHECK: !DIBasicType(name: "unsigned _BitInt", size: 32,  dataSize: 17, encoding: DW_ATE_unsigned) +// CHECK: !DIBasicType(name: "_BitInt(2)", size: 8, dataSize: 2, encoding: DW_ATE_signed) +// CHECK: !DIBasicType(name: "unsigned _BitInt(17)", size: 32,  dataSize: 17, encoding: DW_ATE_unsigned) diff --git a/clang/test/DebugInfo/Generic/macro-info.c b/clang/test/DebugInfo/Generic/macro-info.c new file mode 100644 index 0000000..ec49eb5 --- /dev/null +++ b/clang/test/DebugInfo/Generic/macro-info.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 %s -debug-info-kind=standalone -emit-llvm -o - | FileCheck %s + +#define GLOBAL(num) global## num +#define DECL_GLOBAL(x) int x +#define SAME_ORDER(x, y) x; y +#define SWAP_ORDER(x,y) y; x + + + +SAME_ORDER( +  int +// CHECK: DIGlobalVariable(name: "global",{{.*}} line: [[@LINE+1]] +    GLOBAL  // <- global +      () = 42, +  const char* s() { +// CHECK: DIGlobalVariable({{.*}}line: [[@LINE+1]],{{.*}} type: [[TYPEID:![0-9]+]] +    return "1234567890"; +  } +) + +SWAP_ORDER( +  int GLOBAL(  // <- global2 +    2) = 43, +// CHECK: DIGlobalVariable(name: "global3",{{.*}} line: [[@LINE+3]] +// CHECK: DIGlobalVariable(name: "global2",{{.*}} line: [[@LINE-3]] +  DECL_GLOBAL( +    GLOBAL(  // <- global3 +      3)) = 44 +); + + +DECL_GLOBAL( +// CHECK: DIGlobalVariable(name: "global4",{{.*}} line: [[@LINE+1]] +  GLOBAL(  // <- global4 +    4)); diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 9fda2f3..6d049e7 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -154,26 +154,10 @@  // ARCH-GCN-DAG: #define __[[CPU]]__ 1  // ARCH-GCN-DAG: #define __[[FAMILY]]__ 1  // ARCH-GCN-DAG: #define __amdgcn_processor__ "[[CPU]]" -// ARCH-GCN-DAG: #define __AMDGCN_WAVEFRONT_SIZE [[WAVEFRONT_SIZE]]  // ARCH-GCN-DAG: #define __GCC_DESTRUCTIVE_SIZE 128  // ARCH-GCN-DAG: #define __GCC_CONSTRUCTIVE_SIZE 128  // UNSAFEFPATOMIC-DAG: #define __AMDGCN_UNSAFE_FP_ATOMICS__ 1 -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ -// RUN:   %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ -// RUN:   %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ -// RUN:   -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ -// RUN:   -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE32 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mno-wavefrontsize64 \ -// RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 \ -// RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 -  // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 \  // RUN:   %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s  // RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mcumode \ diff --git a/clang/test/Driver/fat-archive-unbundle-ext.c b/clang/test/Driver/fat-archive-unbundle-ext.c index e797acc..d658ad05 100644 --- a/clang/test/Driver/fat-archive-unbundle-ext.c +++ b/clang/test/Driver/fat-archive-unbundle-ext.c @@ -1,5 +1,5 @@  // REQUIRES: x86-registered-target -// UNSUPPORTED: target={{.*-windows.*}}, target={{.*}}-macosx{{.*}}, target={{.*-darwin.*}}, target={{.*}}-aix{{.*}} +// UNSUPPORTED: target={{.*-windows.*}}, target={{.*}}-macosx{{.*}}, target={{.*-darwin.*}}, target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}}  // Generate dummy fat object  // RUN: %clang -O0 --target=%itanium_abi_triple %s -c -o %t.host.o diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip index 516e01a..4c460d5 100644 --- a/clang/test/Driver/hip-macros.hip +++ b/clang/test/Driver/hip-macros.hip @@ -1,27 +1,4 @@  // REQUIRES: amdgpu-registered-target -// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s -// RUN: %clang -E -dM --offload-arch=gfx906 -mno-wavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ -// RUN:   --cuda-device-only -nogpuinc -nogpulib \ -// RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32 -// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 -// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 -  // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib \  // RUN:   %s 2>&1 | FileCheck --check-prefix=CUMODE-ON %s  // RUN: %clang -E -dM --offload-arch=gfx906 --cuda-device-only -nogpuinc -nogpulib -mcumode \ diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip deleted file mode 100644 index 8a60f5a..0000000 --- a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip +++ /dev/null @@ -1,115 +0,0 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s -// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s - -// Test that deprecation warnings for the wavefront size macro are emitted properly. - -#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__ - -#define DOUBLE_WRAPPED (WRAPPED) - -template <bool C, class T = void> struct my_enable_if {}; - -template <class T> struct my_enable_if<true, T> { -  typedef T type; -}; - -__attribute__((host, device)) void use(int, const char*); - -template<int N> __attribute__((host, device)) int templatify(int x) { -    return x + N; -} - -__attribute__((device)) const int GlobalConst = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -constexpr int GlobalConstExpr = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int foo(void); -#endif - -__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -__attribute__((device)) -void device_fun() { -    use(__AMDGCN_WAVEFRONT_SIZE, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} -    use(__AMDGCN_WAVEFRONT_SIZE__, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(DOUBLE_WRAPPED, "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(GlobalConst, "device function"); -    use(GlobalConstExpr, "device function"); -} - -__attribute__((global)) -void global_fun() { -    // no warnings expected -    use(__AMDGCN_WAVEFRONT_SIZE, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} -    use(__AMDGCN_WAVEFRONT_SIZE__, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(DOUBLE_WRAPPED, "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -int host_var = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int host_var_alt = __AMDGCN_WAVEFRONT_SIZE; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} -int host_var_wrapped = WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -int host_var_double_wrapped = DOUBLE_WRAPPED; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} - -__attribute__((host)) -void host_fun() { -    use(__AMDGCN_WAVEFRONT_SIZE, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE' has been marked as deprecated}} -    use(__AMDGCN_WAVEFRONT_SIZE__, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(DOUBLE_WRAPPED, "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(GlobalConst, "host function"); -    use(GlobalConstExpr, "host function"); -} - -__attribute((host, device)) -void host_device_fun() { -    use(__AMDGCN_WAVEFRONT_SIZE__, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(DOUBLE_WRAPPED, "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function"); // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -template <unsigned int OuterWarpSize = __AMDGCN_WAVEFRONT_SIZE__> // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -class FunSelector { -public: -    template<unsigned int FunWarpSize = OuterWarpSize> -    __attribute__((device)) -    auto fun(void) -        -> typename my_enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    { -        use(1, "yay!"); -    } - -    template<unsigned int FunWarpSize = OuterWarpSize> -    __attribute__((device)) -    auto fun(void) -        -> typename my_enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE__), void>::type // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    { -        use(0, "nay!"); -    } -}; - -__attribute__((device)) -void device_fun_selector_user() { -    FunSelector<> f; -    f.fun<>(); -    f.fun<1>(); -    f.fun<1000>(); - -    my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x = 42; // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -} - -__attribute__((device)) my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type DeviceFunTemplateRet(void) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    return 42; -} - -__attribute__((device)) int DeviceFunTemplateArg(my_enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE__), int>::type x) { // expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}} -    return x; -} - -// expected-note@* 0+ {{macro marked 'deprecated' here}} diff --git a/clang/test/Headers/cuda_with_openmp.cu b/clang/test/Headers/cuda_with_openmp.cu index efde4ec..8ea0de5 100644 --- a/clang/test/Headers/cuda_with_openmp.cu +++ b/clang/test/Headers/cuda_with_openmp.cu @@ -2,7 +2,7 @@  // Reported in https://bugs.llvm.org/show_bug.cgi?id=48014  ///==========================================================================/// -// REQUIRES: nvptx-registered-target +// REQUIRES: nvptx-registered-target, host-supports-cuda  // RUN: %clang -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 30fb7ab..b059f18 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,8 +1,10 @@  // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60  // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s  // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s  // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60  // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s  // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s  // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump  %s | FileCheck %s --check-prefix=DUMP @@ -101,8 +103,8 @@ T tmain(T argc, T *argv) {    a = 2;  #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv)    foo(); -#pragma omp taskgroup task_reduction(-: argc) -#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) +#pragma omp taskgroup task_reduction(+: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc)    foo();    return 0;  } @@ -119,8 +121,8 @@ T tmain(T argc, T *argv) {  // CHECK-NEXT: a = 2;  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv)  // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc)  // CHECK-NEXT: foo()  // CHECK: template<> int tmain<int, 5>(int argc, int *argv) {  // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -134,8 +136,8 @@ T tmain(T argc, T *argv) {  // CHECK-NEXT: a = 2;  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv)  // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc)  // CHECK-NEXT: foo()  // CHECK: template<> long tmain<long, 1>(long argc, long *argv) {  // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -149,8 +151,8 @@ T tmain(T argc, T *argv) {  // CHECK-NEXT: a = 2;  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv)  // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc)  // CHECK-NEXT: foo()  enum Enum {}; @@ -199,6 +201,14 @@ int main(int argc, char **argv) {  #pragma omp task depend(inout: omp_all_memory)    foo();    // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp task threadset(omp_pool) +#pragma omp task threadset(omp_team) +  foo(); +#endif +  // CHECK60: #pragma omp task threadset(omp_pool) +  // CHECK60: #pragma omp task threadset(omp_team) +  // CHECK60-NEXT: foo();    return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);  } diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index c3e6d9e6b..ba8e694 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -41,6 +41,9 @@  // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \  // RUN: --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6  // expected-no-diagnostics  #ifndef HEADER @@ -65,6 +68,7 @@ struct S {    S(const S &s) : a(s.a) {}    ~S() {}  }; +  int a;  int main() {    char b; @@ -147,6 +151,7 @@ int main() { +  // s1 = S(); @@ -215,6 +220,19 @@ void test_omp_all_memory()    }  }  #endif // OMP51 + +#ifdef OMP60 +void test_threadset() +{ +#pragma omp task threadset(omp_team) +  { +  } +#pragma omp task threadset(omp_pool) +  { +  } +} +#endif // OMP60 +  #endif  // CHECK1-LABEL: define {{[^@]+}}@main  // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -10243,3 +10261,18 @@ void test_omp_all_memory()  // CHECK4-51-NEXT:    call void @__cxx_global_var_init()  // CHECK4-51-NEXT:    ret void  // +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT:  entry: +// CHECK6-NEXT:       [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 +// CHECK6-NEXT:       [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT:       [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT:       getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0 +// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT:       call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) +// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT:       [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT:       getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 +// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT:       call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) +// CHECK6-NEXT:       ret void diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp new file mode 100755 index 0000000..f553a2d --- /dev/null +++ b/clang/test/OpenMP/task_threadset_messages.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +#ifdef OMP60 +struct ComplexStruct { +  int data[10]; +  struct InnerStruct { +    float value; +  } inner; +}; + +// Template class with member functions using 'threadset'. +template <typename T> +class TemplateClass { +public: +  void foo() { +    #pragma omp task threadset(omp_pool) +    { +      T temp; +    } +  } +  void bar() { +    #pragma omp taskloop threadset(omp_team) +    for (int i = 0; i < 10; ++i) {} +  } +}; + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive. +void test_task_threadset_valid() { +  int a; +  #pragma omp task threadset(omp_pool) +  #pragma omp task threadset(omp_team) +  #pragma omp task threadset(omp_pool) if(1) +  #pragma omp task threadset(omp_team) priority(5) +  #pragma omp task threadset(omp_pool) depend(out: a) +  #pragma omp parallel +  { +    #pragma omp task threadset(omp_pool) +    { +      #pragma omp taskloop threadset(omp_team) +      for (int i = 0; i < 5; ++i) {} +    } +  } + +  TemplateClass<int> obj; +  obj.foo(); +  obj.bar(); +} + +// Invalid uses of 'threadset' with incorrect arguments in task directive. +void test_task_threadset_invalid_args() { +  #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} +  #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  {} +} + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive. +void test_taskloop_threadset_valid() { +  #pragma omp taskloop threadset(omp_pool) +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(omp_team) +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(omp_pool) grainsize(5) +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(omp_team) num_tasks(2) +  for (int i = 0; i < 10; ++i) {} +} + +// Invalid uses of 'threadset' with incorrect arguments in taskloop directive. +void test_taskloop_threadset_invalid_args() { +  #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} +  for (int i = 0; i < 10; ++i) {} +} + +#else +void test_threadset_not_supported() { +  #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} +  #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} +  #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} +  for (int i = 0; i < 10; ++i) {} +  #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} +  for (int i = 0; i < 10; ++i) {} +} +#endif diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp index 1b6d724..e4bf20a 100644 --- a/clang/test/OpenMP/taskloop_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_ast_print.cpp @@ -1,8 +1,10 @@  // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60  // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s  // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s  // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60  // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s  // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s  // expected-no-diagnostics @@ -87,6 +89,20 @@ int main(int argc, char **argv) {    // CHECK-NEXT: #pragma omp cancel taskgroup    // CHECK-NEXT: #pragma omp cancellation point taskgroup    // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp taskloop threadset(omp_team) +  for (int i = 0; i < 10; ++i) { +#pragma omp taskloop threadset(omp_pool) +  for (int j = 0; j < 10; ++j) { +    foo(); +  } +} +#endif + // CHECK60: #pragma omp taskloop threadset(omp_team) + // CHECK60-NEXT: for (int i = 0; i < 10; ++i) { + // CHECK60: #pragma omp taskloop threadset(omp_pool) + // CHECK60-NEXT: for (int j = 0; j < 10; ++j) { + // CHECK60-NEXT: foo();    return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));  } diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 69f8d3b..d119760 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -5,7 +5,12 @@  // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s  // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s  // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +  // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6  // expected-no-diagnostics  #ifndef HEADER  #define HEADER @@ -241,4 +246,52 @@ void taskloop_with_class() {    }  } +#ifdef OMP60 +void test_threadset() +{ +#pragma omp taskloop threadset(omp_team) +  for (int i = 0; i < 10; ++i) { +  } +#pragma omp taskloop threadset(omp_pool) +  for (int i = 0; i < 10; ++i) { +  } +} +#endif // OMP60 +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT:  entry: +// CHECK6-NEXT:       [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK6-NEXT:       %[[TMP:.*]] = alloca i32, align 4 +// CHECK6-NEXT:       [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK6-NEXT:       %[[TMP2:.*]] = alloca i32, align 4 +// CHECK6-NEXT:       %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT:       call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT:       %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT:       %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0 +// CHECK6-NEXT:       %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5 +// CHECK6-NEXT:       store i64 0, ptr %[[TID3:.*]], align 8 +// CHECK6-NEXT:       %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6 +// CHECK6-NEXT:       store i64 9, ptr %[[TID4:.*]], align 8 +// CHECK6-NEXT:       %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7 +// CHECK6-NEXT:       store i64 1, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT:       %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9 +// CHECK6-NEXT:       call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT:       %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT:       call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT:       call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT:       call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT:       %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT:       %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 +// CHECK6-NEXT:       %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 +// CHECK6-NEXT:       store i64 0, ptr %[[TID10:.*]], align 8 +// CHECK6-NEXT:       %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6 +// CHECK6-NEXT:       store i64 9, ptr %[[TID11:.*]], align 8 +// CHECK6-NEXT:       %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7 +// CHECK6-NEXT:       store i64 1, ptr %[[TID12:.*]], align 8 +// CHECK6-NEXT:       %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9 +// CHECK6-NEXT:       call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT:       %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8 +// CHECK6-NEXT:       call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT:       call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT:       ret void +  #endif diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index a3c3697..cdb4632 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4418,7 +4418,6 @@  // CHECK_AMDGCN_NONE-NOT: #define __HAS_FMAF__  // CHECK_AMDGCN_NONE-NOT: #define __HAS_FP64__  // CHECK_AMDGCN_NONE-NOT: #define __HAS_LDEXPF__ -// CHECK_AMDGCN_NONE-NOT: #define __AMDGCN_WAVEFRONT_SIZE__  // Begin r600 tests ---------------- @@ -4439,7 +4438,6 @@  // RUN: %clang -x hip -E -dM %s -o - 2>&1 --offload-host-only -nogpulib \  // RUN:     -nogpuinc --offload-arch=gfx803 -target x86_64-unknown-linux \  // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_HIP_HOST -// CHECK_HIP_HOST: #define __AMDGCN_WAVEFRONT_SIZE__ 64  // CHECK_HIP_HOST: #define __AMDGPU__ 1  // CHECK_HIP_HOST: #define __AMD__ 1 diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl new file mode 100644 index 0000000..47dbdd4 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(i32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f32_f32' must be a constant integer}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f32_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(23, f32, tex, vec4i32, 0, i32, 11); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(i32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f16_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_f32_f32' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl new file mode 100644 index 0000000..e60f8c7 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify=GFX94 -S -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_r' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_g' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_b' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_a' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(105, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f32_f32' needs target feature extended-image-insts}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(105, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_f32_f32' needs target feature extended-image-insts}} +} | 
