// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --version 4 // RUN: %clang_cc1 -O0 -triple i686-pc-darwin -emit-llvm -o - %s | FileCheck -check-prefix=X86 %s // RUN: %clang_cc1 -O0 -triple amdgcn -emit-llvm -o - %s | FileCheck -check-prefix=AMDGCN %s #pragma OPENCL EXTENSION __cl_clang_function_pointers : enable typedef int int2 __attribute__((ext_vector_type(2))); typedef struct { int cells[9]; } Mat3X3; typedef struct { int cells[16]; } Mat4X4; typedef struct { int cells[1024]; } Mat32X32; typedef struct { int cells[4096]; } Mat64X64; struct StructOneMember { int2 x; }; struct StructTwoMember { int2 x; int2 y; }; struct LargeStructOneMember { int2 x[100]; }; struct LargeStructTwoMember { int2 x[40]; int2 y[20]; }; Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { Mat4X4 out; return out; } Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { Mat64X64 out; return out; } void FuncOneMember(struct StructOneMember u) { u.x = (int2)(0, 0); } void FuncOneLargeMember(struct LargeStructOneMember u) { u.x[0] = (int2)(0, 0); } void FuncTwoMember(struct StructTwoMember u) { u.y = (int2)(0, 0); } void FuncLargeTwoMember(struct LargeStructTwoMember u) { u.y[0] = (int2)(0, 0); } __attribute__((noinline)) kernel void callee_kern(global int *A){ *A = 1; } __attribute__((optnone)) kernel void callee_kern_with_optnone_attribute(global int *A){ *A = 1; } __attribute__((always_inline)) kernel void callee_kern_with_alwaysinline_attribute(global int *A){ *A = 1; } kernel void callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } kernel void callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out) { out[0] = foo_large(in[1]); } kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } kernel void KernelLargeOneMember(struct LargeStructOneMember u) { FuncOneLargeMember(u); } kernel void KernelTwoMember(struct StructTwoMember u) { FuncTwoMember(u); } kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { FuncLargeTwoMember(u); } __attribute__((noinline)) kernel void ext_callee_kern(global int *A); kernel void ext_callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out); kernel void ext_callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out); kernel void ext_KernelOneMember(struct StructOneMember u); kernel void ext_KernelLargeOneMember(struct LargeStructOneMember u); kernel void ext_KernelTwoMember(struct StructTwoMember u); kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u); kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){ callee_kern(A); ext_callee_kern(A); callee_kern_with_optnone_attribute(A); callee_kern_with_alwaysinline_attribute(A); callee_kern_Mat3X3(mat3X3, mat4X4); callee_kern_Mat32X32(mat32X32, mat64X64); ext_callee_kern_Mat3X3(mat3X3, mat4X4); ext_callee_kern_Mat32X32(mat32X32, mat64X64); } kernel void caller_kern2(struct StructOneMember structOneMem, global struct StructOneMember* global_structOneMem, struct StructTwoMember structTwoMem){ KernelOneMember(structOneMem); ext_KernelOneMember(structOneMem); KernelTwoMember(structTwoMem); ext_KernelTwoMember(structTwoMem); } kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct LargeStructTwoMember largeStructTwoMem){ KernelLargeOneMember(largeStructOneMem); KernelLargeTwoMember(largeStructTwoMem); ext_KernelLargeOneMember(largeStructOneMem); ext_KernelLargeTwoMember(largeStructTwoMem); } // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @foo( // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] { // X86-NEXT: entry: // X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @foo_large( // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] { // X86-NEXT: entry: // X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @FuncOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @FuncOneLargeMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @FuncTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @FuncLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8 // X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @callee_kern( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6:[0-9]+]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: store i32 1, ptr [[TMP0]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @callee_kern_with_optnone_attribute( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr noundef align 4 [[TMP0]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: store i32 1, ptr [[TMP0]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: alwaysinline convergent norecurse nounwind // X86-LABEL: define spir_kernel void @callee_kern_with_alwaysinline_attribute( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4 // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: store ptr [[TMP0]], ptr [[A_ADDR_I]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4 // X86-NEXT: store i32 1, ptr [[TMP1]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: alwaysinline convergent norecurse nounwind // X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_with_alwaysinline_attribute( // X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: store i32 1, ptr [[TMP0]], align 4 // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @callee_kern_Mat3X3( // X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { // X86-NEXT: entry: // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat3X3( // X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4 // X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1 // X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]] // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @callee_kern_Mat32X32( // X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_Mat32X32( // X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4 // X86-NEXT: store ptr [[IN]], ptr [[IN_ADDR]], align 4 // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1 // X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]] // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @KernelOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_KernelOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) // X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @KernelLargeOneMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeOneMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) // X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @KernelTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_KernelTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) // X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @KernelLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_KernelLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) // X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @caller_kern( // X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4 // X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 // X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern( // X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { // X86-NEXT: entry: // X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4 // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: store ptr [[MAT3X3]], ptr [[MAT3X3_ADDR]], align 4 // X86-NEXT: store ptr [[MAT4X4]], ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6]] // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR6]] // X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr noundef align 4 [[TMP2]]) #[[ATTR6]] // X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // X86-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_I]], align 4 // X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4 // X86-NEXT: store i32 1, ptr [[TMP4]], align 4 // X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 // X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP5]], ptr noundef align 4 [[TMP6]]) #[[ATTR6]] // X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP7]], ptr noundef align 4 [[TMP8]]) #[[ATTR6]] // X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 // X86-NEXT: [[TMP10:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP9]], ptr noundef align 4 [[TMP10]]) #[[ATTR6]] // X86-NEXT: [[TMP11:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: [[TMP12:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP11]], ptr noundef align 4 [[TMP12]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @caller_kern2( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[STRUCTONEMEM:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[STRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { // X86-NEXT: entry: // X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern2( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { // X86-NEXT: entry: // X86-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 // X86-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 // X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false) // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false) // X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 // X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @caller_kern3( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[LARGESTRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[LARGESTRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: // X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern3( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP1:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: // X86-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 // X86-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTONEMEM]], ptr align 4 [[TMP0]], i32 800, i1 false) // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 480, i1 false) // X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR6]] // X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( // AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0 // AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4 // AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4 // AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @foo_large( // AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @FuncOneMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @FuncTwoMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7:[0-9]+]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_with_optnone_attribute( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: alwaysinline convergent norecurse nounwind // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_with_alwaysinline_attribute( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR_I:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(5) [[A_ADDR_I]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR_I]], align 8 // AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP1]], align 4 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: alwaysinline convergent norecurse nounwind // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_with_alwaysinline_attribute( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat3X3( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat3X3( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META10]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 // AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat32X32( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_Mat32X32( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META8]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) // AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR7]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelOneMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( // AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeOneMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( // AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 // AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelTwoMember( // AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 // AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( // AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_KernelLargeTwoMember( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) // AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META18:![0-9]+]] !kernel_arg_access_qual [[META19:![0-9]+]] !kernel_arg_type [[META20:![0-9]+]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21:![0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 // AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]], ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]], ptr addrspace(1) noundef align 4 [[TMP4]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[A_ADDR_I:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT32X32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT64X64_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT3X3]], ptr addrspace(5) [[MAT3X3_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT4X4]], ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr addrspace(1) noundef align 4 [[TMP2]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[TMP3]], ptr addrspace(5) [[A_ADDR_I]], align 8 // AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR_I]], align 8 // AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4 // AMDGCN-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 // AMDGCN-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP5]], ptr addrspace(1) noundef align 4 [[TMP6]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP7]], ptr addrspace(1) noundef align 4 [[TMP8]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 // AMDGCN-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP9]], ptr addrspace(1) noundef align 4 [[TMP10]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP11]], ptr addrspace(1) noundef align 4 [[TMP12]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern2( // AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[STRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) // AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 // AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 // AMDGCN-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP5]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP2]], ptr addrspace(1) noundef align 8 [[TMP1]], <2 x i32> [[TMP4]], <2 x i32> [[TMP6]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern2( // AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE0:%.*]], <2 x i32> [[STRUCTTWOMEM_COERCE1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE0]], ptr addrspace(5) [[TMP0]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: store <2 x i32> [[STRUCTTWOMEM_COERCE1]], ptr addrspace(5) [[TMP1]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP2]]) #[[ATTR7]] // AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE2]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(<2 x i32> [[TMP3]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 // AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8 // AMDGCN-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP10]], align 8 // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(<2 x i32> [[TMP9]], <2 x i32> [[TMP11]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern3( // AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP1]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern3( // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META26]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(5) align 8 [[TMP1]], i64 480, i1 false) // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR7]] // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR7]] // AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // //. // X86: [[META4]] = !{i32 1} // X86: [[META5]] = !{!"none"} // X86: [[META6]] = !{!"int*"} // X86: [[META7]] = !{!""} // X86: [[META8]] = !{i32 1, i32 1} // X86: [[META9]] = !{!"none", !"none"} // X86: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"} // X86: [[META11]] = !{!"", !""} // X86: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"} // X86: [[META13]] = !{i32 0} // X86: [[META14]] = !{!"struct StructOneMember"} // X86: [[META15]] = !{!"struct LargeStructOneMember"} // X86: [[META16]] = !{!"struct StructTwoMember"} // X86: [[META17]] = !{!"struct LargeStructTwoMember"} // X86: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1} // X86: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"} // X86: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"} // X86: [[META21]] = !{!"", !"", !"", !"", !""} // X86: [[META22]] = !{i32 0, i32 1, i32 0} // X86: [[META23]] = !{!"none", !"none", !"none"} // X86: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"} // X86: [[META25]] = !{!"", !"", !""} // X86: [[META26]] = !{i32 0, i32 0} // X86: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"} //. // AMDGCN: [[META4]] = !{i32 1} // AMDGCN: [[META5]] = !{!"none"} // AMDGCN: [[META6]] = !{!"int*"} // AMDGCN: [[META7]] = !{!""} // AMDGCN: [[META8]] = !{i32 1, i32 1} // AMDGCN: [[META9]] = !{!"none", !"none"} // AMDGCN: [[META10]] = !{!"Mat3X3*", !"Mat4X4*"} // AMDGCN: [[META11]] = !{!"", !""} // AMDGCN: [[META12]] = !{!"Mat32X32*", !"Mat64X64*"} // AMDGCN: [[META13]] = !{i32 0} // AMDGCN: [[META14]] = !{!"struct StructOneMember"} // AMDGCN: [[META15]] = !{!"struct LargeStructOneMember"} // AMDGCN: [[META16]] = !{!"struct StructTwoMember"} // AMDGCN: [[META17]] = !{!"struct LargeStructTwoMember"} // AMDGCN: [[META18]] = !{i32 1, i32 1, i32 1, i32 1, i32 1} // AMDGCN: [[META19]] = !{!"none", !"none", !"none", !"none", !"none"} // AMDGCN: [[META20]] = !{!"int*", !"Mat3X3*", !"Mat4X4*", !"Mat32X32*", !"Mat64X64*"} // AMDGCN: [[META21]] = !{!"", !"", !"", !"", !""} // AMDGCN: [[META22]] = !{i32 0, i32 1, i32 0} // AMDGCN: [[META23]] = !{!"none", !"none", !"none"} // AMDGCN: [[META24]] = !{!"struct StructOneMember", !"struct StructOneMember*", !"struct StructTwoMember"} // AMDGCN: [[META25]] = !{!"", !"", !""} // AMDGCN: [[META26]] = !{i32 0, i32 0} // AMDGCN: [[META27]] = !{!"struct LargeStructOneMember", !"struct LargeStructTwoMember"} //.