diff options
Diffstat (limited to 'clang/test/CodeGen')
| -rw-r--r-- | clang/test/CodeGen/AArch64/neon-across.c | 132 | ||||
| -rw-r--r-- | clang/test/CodeGen/AArch64/neon-intrinsics.c | 20 | ||||
| -rw-r--r-- | clang/test/CodeGen/X86/sse41-builtins.c | 10 | ||||
| -rw-r--r-- | clang/test/CodeGen/builtins-extended-image.c | 1528 | 
4 files changed, 1602 insertions, 88 deletions
| diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c index aa0387d..aae5097 100644 --- a/clang/test/CodeGen/AArch64/neon-across.c +++ b/clang/test/CodeGen/AArch64/neon-across.c @@ -113,9 +113,8 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_S8_I]]  //  int8_t test_vmaxv_s8(int8x8_t a) {    return vmaxv_s8(a); @@ -124,9 +123,8 @@ int8_t test_vmaxv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_S16_I]]  //  int16_t test_vmaxv_s16(int16x4_t a) {    return vmaxv_s16(a); @@ -135,9 +133,8 @@ int16_t test_vmaxv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXV_U8_I]]  //  uint8_t test_vmaxv_u8(uint8x8_t a) {    return vmaxv_u8(a); @@ -146,9 +143,8 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXV_U16_I]]  //  uint16_t test_vmaxv_u16(uint16x4_t a) {    return vmaxv_u16(a); @@ -157,9 +153,8 @@ uint16_t test_vmaxv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_S8_I]]  //  int8_t test_vmaxvq_s8(int8x16_t a) {    return vmaxvq_s8(a); @@ -168,9 +163,8 @@ int8_t test_vmaxvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_S16_I]]  //  int16_t test_vmaxvq_s16(int16x8_t a) {    return vmaxvq_s16(a); @@ -179,7 +173,7 @@ int16_t test_vmaxvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_S32_I]]  //  int32_t test_vmaxvq_s32(int32x4_t a) { @@ -189,9 +183,8 @@ int32_t test_vmaxvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMAXVQ_U8_I]]  //  uint8_t test_vmaxvq_u8(uint8x16_t a) {    return vmaxvq_u8(a); @@ -200,9 +193,8 @@ uint8_t test_vmaxvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMAXVQ_U16_I]]  //  uint16_t test_vmaxvq_u16(uint16x8_t a) {    return vmaxvq_u16(a); @@ -211,7 +203,7 @@ uint16_t test_vmaxvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXVQ_U32_I]]  //  uint32_t test_vmaxvq_u32(uint32x4_t a) { @@ -221,9 +213,8 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_S8_I]]  //  int8_t test_vminv_s8(int8x8_t a) {    return vminv_s8(a); @@ -232,9 +223,8 @@ int8_t test_vminv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_S16_I]]  //  int16_t test_vminv_s16(int16x4_t a) {    return vminv_s16(a); @@ -243,9 +233,8 @@ int16_t test_vminv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINV_U8_I]]  //  uint8_t test_vminv_u8(uint8x8_t a) {    return vminv_u8(a); @@ -254,9 +243,8 @@ uint8_t test_vminv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINV_U16_I]]  //  uint16_t test_vminv_u16(uint16x4_t a) {    return vminv_u16(a); @@ -265,9 +253,8 @@ uint16_t test_vminv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_S8_I]]  //  int8_t test_vminvq_s8(int8x16_t a) {    return vminvq_s8(a); @@ -276,9 +263,8 @@ int8_t test_vminvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_S16_I]]  //  int16_t test_vminvq_s16(int16x8_t a) {    return vminvq_s16(a); @@ -287,7 +273,7 @@ int16_t test_vminvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_S32_I]]  //  int32_t test_vminvq_s32(int32x4_t a) { @@ -297,9 +283,8 @@ int32_t test_vminvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VMINVQ_U8_I]]  //  uint8_t test_vminvq_u8(uint8x16_t a) {    return vminvq_u8(a); @@ -308,9 +293,8 @@ uint8_t test_vminvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VMINVQ_U16_I]]  //  uint16_t test_vminvq_u16(uint16x8_t a) {    return vminvq_u16(a); @@ -319,7 +303,7 @@ uint16_t test_vminvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINVQ_U32_I]]  //  uint32_t test_vminvq_u32(uint32x4_t a) { @@ -329,9 +313,8 @@ uint32_t test_vminvq_u32(uint32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_S8_I]]  //  int8_t test_vaddv_s8(int8x8_t a) {    return vaddv_s8(a); @@ -340,9 +323,8 @@ int8_t test_vaddv_s8(int8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_S16_I]]  //  int16_t test_vaddv_s16(int16x4_t a) {    return vaddv_s16(a); @@ -351,9 +333,8 @@ int16_t test_vaddv_s16(int16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u8  // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDV_U8_I]]  //  uint8_t test_vaddv_u8(uint8x8_t a) {    return vaddv_u8(a); @@ -362,9 +343,8 @@ uint8_t test_vaddv_u8(uint8x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u16  // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDV_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDV_U16_I]]  //  uint16_t test_vaddv_u16(uint16x4_t a) {    return vaddv_u16(a); @@ -373,9 +353,8 @@ uint16_t test_vaddv_u16(uint16x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_S8_I]]  //  int8_t test_vaddvq_s8(int8x16_t a) {    return vaddvq_s8(a); @@ -384,9 +363,8 @@ int8_t test_vaddvq_s8(int8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_S16_I]]  //  int16_t test_vaddvq_s16(int16x8_t a) {    return vaddvq_s16(a); @@ -395,7 +373,7 @@ int16_t test_vaddvq_s16(int16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_S32_I]]  //  int32_t test_vaddvq_s32(int32x4_t a) { @@ -405,9 +383,8 @@ int32_t test_vaddvq_s32(int32x4_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8  // CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 -// CHECK-NEXT:    ret i8 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]]) +// CHECK-NEXT:    ret i8 [[VADDVQ_U8_I]]  //  uint8_t test_vaddvq_u8(uint8x16_t a) {    return vaddvq_u8(a); @@ -416,9 +393,8 @@ uint8_t test_vaddvq_u8(uint8x16_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16  // CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]]) -// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16 -// CHECK-NEXT:    ret i16 [[TMP0]] +// CHECK-NEXT:    [[VADDVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]]) +// CHECK-NEXT:    ret i16 [[VADDVQ_U16_I]]  //  uint16_t test_vaddvq_u16(uint16x8_t a) {    return vaddvq_u16(a); @@ -427,7 +403,7 @@ uint16_t test_vaddvq_u16(uint16x8_t a) {  // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32  // CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  entry: -// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]]) +// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDVQ_U32_I]]  //  uint32_t test_vaddvq_u32(uint32x4_t a) { diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 035e1ca..1c628bb 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -12643,7 +12643,7 @@ uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {  // CHECK-LABEL: define dso_local i64 @test_vpaddd_s64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VPADDD_S64_I]]  //  int64_t test_vpaddd_s64(int64x2_t a) { @@ -23227,7 +23227,7 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {  // CHECK-LABEL: define dso_local i64 @test_vpaddd_u64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VPADDD_U64_I]]  //  uint64_t test_vpaddd_u64(uint64x2_t a) { @@ -23237,7 +23237,7 @@ uint64_t test_vpaddd_u64(uint64x2_t a) {  // CHECK-LABEL: define dso_local i64 @test_vaddvq_s64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VADDVQ_S64_I]]  //  int64_t test_vaddvq_s64(int64x2_t a) { @@ -23247,7 +23247,7 @@ int64_t test_vaddvq_s64(int64x2_t a) {  // CHECK-LABEL: define dso_local i64 @test_vaddvq_u64(  // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]]) +// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])  // CHECK-NEXT:    ret i64 [[VADDVQ_U64_I]]  //  uint64_t test_vaddvq_u64(uint64x2_t a) { @@ -23878,7 +23878,7 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {  // CHECK-LABEL: define dso_local i32 @test_vminv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINV_S32_I]]  //  int32_t test_vminv_s32(int32x2_t a) { @@ -23888,7 +23888,7 @@ int32_t test_vminv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vminv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMINV_U32_I]]  //  uint32_t test_vminv_u32(uint32x2_t a) { @@ -23898,7 +23898,7 @@ uint32_t test_vminv_u32(uint32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vmaxv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXV_S32_I]]  //  int32_t test_vmaxv_s32(int32x2_t a) { @@ -23908,7 +23908,7 @@ int32_t test_vmaxv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vmaxv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VMAXV_U32_I]]  //  uint32_t test_vmaxv_u32(uint32x2_t a) { @@ -23918,7 +23918,7 @@ uint32_t test_vmaxv_u32(uint32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vaddv_s32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDV_S32_I]]  //  int32_t test_vaddv_s32(int32x2_t a) { @@ -23928,7 +23928,7 @@ int32_t test_vaddv_s32(int32x2_t a) {  // CHECK-LABEL: define dso_local i32 @test_vaddv_u32(  // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {  // CHECK-NEXT:  [[ENTRY:.*:]] -// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[A]]) +// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])  // CHECK-NEXT:    ret i32 [[VADDV_U32_I]]  //  uint32_t test_vaddv_u32(uint32x2_t a) { diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392..35fa65a 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) {    return _mm_insert_ps(x, y, 4);  } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all +  __m128i test_mm_max_epi8(__m128i x, __m128i y) {    // CHECK-LABEL: test_mm_max_epi8    // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c new file mode 100644 index 0000000..0dbf81d --- /dev/null +++ b/clang/test/CodeGen/builtins-extended-image.c @@ -0,0 +1,1528 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts %s -emit-llvm -o - | FileCheck %s + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP3]] +// +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP8]] +// +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP11]] +// +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x float> [[TMP9]] +// +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP3]] +// +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP8]] +// +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP11]] +// +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret <4 x half> [[TMP9]] +// +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP4]] +// +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP5]] +// +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP8]] +// +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP5]] +// +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP6]] +// +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT:  [[ENTRY:.*:]] +// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT:    [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT:    [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT:    store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT:    [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT:    ret float [[TMP9]] +// +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + +  return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} | 
