diff options
Diffstat (limited to 'llvm/include')
-rw-r--r-- | llvm/include/llvm/Analysis/MemoryProfileInfo.h | 8 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsNVVM.td | 139 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsSPIRV.td | 8 |
3 files changed, 143 insertions, 12 deletions
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index 571caf9..be690a4 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -59,6 +59,14 @@ LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type); /// True if the AllocTypes bitmask contains just a single type. LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes); +/// Removes any existing "ambiguous" memprof attribute. Called before we apply a +/// specific allocation type such as "cold", "notcold", or "hot". +LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB); + +/// Adds an "ambiguous" memprof attribute to call with a matched allocation +/// profile but that we haven't yet been able to disambiguate. +LLVM_ABI void addAmbiguousAttribute(CallBase *CB); + /// Class to build a trie of call stack contexts for a particular profiled /// allocation call, along with their associated allocation types. /// The allocation will be at the root of the trie, which is then used to diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 9cfab26..3af1750 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -272,6 +272,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType, bit IsSparse = fals !eq(gft,"m16n8k16:d:f32") : !listsplat(llvm_float_ty, 4), !eq(gft,"m16n8k4:c:f32") : !listsplat(llvm_float_ty, 4), !eq(gft,"m16n8k4:d:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k32:c:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k32:c:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k32:d:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k32:d:f32") : !listsplat(llvm_float_ty, 4), // wmma fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16 // All other supported geometries use the same fragment format for f32 and @@ -298,6 +302,21 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType, bit IsSparse = fals !eq(gft,"m8n8k4:c:f64") : !listsplat(llvm_double_ty, 2), !eq(gft,"m8n8k4:d:f64") : !listsplat(llvm_double_ty, 2), + !eq(gft,"m16n8k4:a:f64") : !listsplat(llvm_double_ty, 2), + !eq(gft,"m16n8k4:b:f64") : [llvm_double_ty], + !eq(gft,"m16n8k4:c:f64") : !listsplat(llvm_double_ty, 4), + !eq(gft,"m16n8k4:d:f64") : !listsplat(llvm_double_ty, 4), + + !eq(gft,"m16n8k8:a:f64") : !listsplat(llvm_double_ty, 4), + !eq(gft,"m16n8k8:b:f64") : !listsplat(llvm_double_ty, 2), + !eq(gft,"m16n8k8:c:f64") : !listsplat(llvm_double_ty, 4), + !eq(gft,"m16n8k8:d:f64") : !listsplat(llvm_double_ty, 4), + + !eq(gft,"m16n8k16:a:f64") : !listsplat(llvm_double_ty, 8), + !eq(gft,"m16n8k16:b:f64") : !listsplat(llvm_double_ty, 4), + !eq(gft,"m16n8k16:c:f64") : !listsplat(llvm_double_ty, 4), + !eq(gft,"m16n8k16:d:f64") : !listsplat(llvm_double_ty, 4), + // wmma bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16 !eq(gft,"m16n16k16:a:bf16") : !listsplat(llvm_i32_ty, 4), !eq(gft,"m16n16k16:b:bf16") : !listsplat(llvm_i32_ty, 4), @@ -378,6 +397,26 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType, bit IsSparse = fals !eq(gft,"m16n8k64:c:s32") : !listsplat(llvm_i32_ty, 4), !eq(gft,"m16n8k64:d:s32") : !listsplat(llvm_i32_ty, 4), + // mma e4m3/e5m2 -> f16/f32 @ m16n8k16 + !eq(gft,"m16n8k16:a:e4m3") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k16:a:e5m2") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k16:b:e4m3") : [llvm_i32_ty], + !eq(gft,"m16n8k16:b:e5m2") : [llvm_i32_ty], + // mma e4m3/e5m2/e3m2/e2m3/e2m1 -> f32 @ m16n8k32 + !eq(gft,"m16n8k32:a:e4m3") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:a:e5m2") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:a:e3m2") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:a:e2m3") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:a:e2m1") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:b:e4m3") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:e5m2") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:e3m2") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:e2m3") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:e2m1") : !listsplat(llvm_i32_ty, 2), + // mma e2m1 -> f32 @m16n8k64 + !eq(gft,"m16n8k64:a:e2m1") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k64:b:e2m1") : !listsplat(llvm_i32_ty, 2), + // wmma/mma b1 -> s32 @ m8n8k128(b1) !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty], !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty], @@ -468,7 +507,7 @@ class WMMA_NAME<string ALayout, string BLayout, int Satfinite, string Rnd, strin # !if(Satfinite, "_satfinite", ""); } -class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op, +class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op, string Kind, WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> { string signature = MMA_SIGNATURE<A, B, C, D>.ret; string record = "int_nvvm_mma" @@ -476,6 +515,7 @@ class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op, # "_" # A.geom # "_" # ALayout # "_" # BLayout + # !if(!ne(Kind, ""), !strconcat("_", !subst("::", "_", Kind)), "") # !if(Satfinite, "_satfinite", "") # signature; } @@ -601,7 +641,7 @@ class NVVM_MMA_OPS { ["m16n8k16", "m16n8k8"], ["bf16"], [], ["f32"], []>.ret; list<list<WMMA_REGS>> f64_mma_ops = MMA_OPS< - ["m8n8k4"], + ["m8n8k4", "m16n8k4", "m16n8k8", "m16n8k16"], ["f64"], [], ["f64"], []>.ret; list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS< ["m8n8k4", "m16n8k8", "m16n8k16"], @@ -609,6 +649,18 @@ class NVVM_MMA_OPS { list<list<WMMA_REGS>> int_mma_ops = MMA_OPS< ["m8n8k16", "m16n8k16", "m16n8k32"], ["s8", "u8"], ["s8", "u8"], ["s32"], []>.ret; + // m16n8k32 fp8 variants are intersected with f8f6f4 variants + // and processed there + list<list<WMMA_REGS>> fp8_mma_ops = MMA_OPS< + ["m16n8k16"], + ["e4m3", "e5m2"], ["e4m3", "e5m2"], + ["f16", "f32"], ["f16", "f32"]>.ret; + // it also contains e4m3/e5m2 from fp8 variants + list<list<WMMA_REGS>> f8f6f4_mma_ops = MMA_OPS< + ["m16n8k32"], + ["e4m3", "e5m2", "e3m2", "e2m3", "e2m1"], + ["e4m3", "e5m2", "e3m2", "e2m3", "e2m1"], + ["f16", "f32"], ["f16", "f32"]>.ret; list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS< ["m8n8k32", "m16n8k32", "m16n8k64"], ["s4", "u4"], ["s4", "u4"], ["s32"], []>.ret; @@ -617,7 +669,8 @@ class NVVM_MMA_OPS { ["b1"], [], ["s32"], []>.ret; list<list<WMMA_REGS>> all_mma_ops = !listconcat( tf32_mma_ops, bf16_mma_ops, f64_mma_ops, - fp_mma_ops, int_mma_ops, subint_mma_ops, bit_mma_ops); + fp_mma_ops, fp8_mma_ops, f8f6f4_mma_ops, + int_mma_ops, subint_mma_ops, bit_mma_ops); list<list<WMMA_REGS>> bf16_mma_sp_ops = MMA_OPS< ["m16n8k16", "m16n8k32"], @@ -770,7 +823,8 @@ class NVVM_MMA_B1OPS<list<WMMA_REGS> frags> { // if NVVM_MMA_SUPPORTED<...>.ret then // def : FOO<>; // The record will only be defined for supported ops. // -class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf> { +class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, + string kind, int satf> { // MMA ops check both layouts. string layout = layout_a # ":" # layout_b; string a_type = frags[0].ptx_elt_type; @@ -805,10 +859,31 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b !or(!ne(a_type, b_type), !ne(c_type, d_type))): false, - // m16n8k8 requires C and D to be the same type. - !and(!eq(geom, "m16n8k8"), + // m16n8k16/m16n8k32 requires C and D to be the same type + !and(!or(!eq(geom, "m16n8k16"), + !eq(geom, "m16n8k32")), !ne(c_type, d_type)): false, + // Limit kind to valid types and geometries + !and(!ne(kind, ""), + !or(!ne(geom, "m16n8k32"), + !and(!ne(a_type, "e4m3"), + !ne(a_type, "e5m2"), + !ne(a_type, "e3m2"), + !ne(a_type, "e2m3"), + !ne(a_type, "e2m1")))): false, + + // Limit m16n8k16/m16n8k32 with no kind to valid types + !and(!eq(kind, ""), + !or(!eq(geom, "m16n8k16"), + !eq(geom, "m16n8k32")), + !or(!eq(a_type, "e3m2"), + !eq(a_type, "e2m3"), + !eq(a_type, "e2m1"), + !eq(b_type, "e3m2"), + !eq(b_type, "e2m3"), + !eq(b_type, "e2m1"))): false, + // All other are OK. true: true ); @@ -882,9 +957,10 @@ class NVVM_MMA_SP_SUPPORTED<list<WMMA_REGS> frags, string metadata, !eq(a_type, "tf32")), !ne(a_type, b_type)): false, - // m16n8k16 and m16n8k32 requires C and D to be the same type. + // m16n8k16, m16n8k32 and m16n8k64 requires C and D to be the same type. !and(!or(!eq(geom, "m16n8k16"), - !eq(geom, "m16n8k32")), + !eq(geom, "m16n8k32"), + !eq(geom, "m16n8k64")), !ne(c_type, d_type)): false, !and(!eq(kind, ""), @@ -1493,6 +1569,18 @@ let TargetPrefix = "nvvm" in { } } + // RS rounding mode (Stochastic Rounding) conversions for f16x2, bf16x2 types + // The last i32 operand provides the random bits for the conversion + foreach relu = ["", "_relu"] in { + foreach satfinite = ["", "_satfinite"] in { + def int_nvvm_ff2f16x2_rs # relu # satfinite : NVVMBuiltin, + PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>; + + def int_nvvm_ff2bf16x2_rs # relu # satfinite : NVVMBuiltin, + PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>; + } + } + foreach satfinite = ["", "_satfinite"] in { def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin, PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>; @@ -1515,6 +1603,15 @@ let TargetPrefix = "nvvm" in { PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; } } + + // RS rounding mode (Stochastic Rounding) conversions for f8x4 types + // The last i32 operand provides the random bits for the conversion + foreach type = ["e4m3x4", "e5m2x4"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin, + PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>; + } + } // FP4 conversions. foreach relu = ["", "_relu"] in { @@ -1524,6 +1621,13 @@ let TargetPrefix = "nvvm" in { def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin, PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; } + + // RS rounding mode (Stochastic Rounding) conversions for f4x4 type + // The last i32 operand provides the random bits for the conversion + foreach relu = ["", "_relu"] in { + def int_nvvm_f32x4_to_e2m1x4_rs # relu # _satfinite : NVVMBuiltin, + PureIntrinsic<[llvm_i16_ty], [llvm_v4f32_ty, llvm_i32_ty]>; + } // FP6 conversions. foreach type = ["e2m3x2", "e3m2x2"] in { @@ -1535,6 +1639,15 @@ let TargetPrefix = "nvvm" in { PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; } } + + // RS rounding mode (Stochastic Rounding) conversions for f6x4 types + // The last i32 operand provides the random bits for the conversion + foreach type = ["e2m3x4", "e3m2x4"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin, + PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>; + } + } // UE8M0x2 conversions. foreach rmode = ["_rz", "_rp"] in { @@ -2215,10 +2328,12 @@ foreach layout_a = ["row", "col"] in { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { foreach b1op = NVVM_MMA_B1OPS<op>.ret in { - if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { - def MMA_NAME<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>.record - : NVVM_MMA<op[0], op[1], op[2], op[3]>; - } + foreach kind = ["", "kind::f8f6f4"] in { + if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, kind, satf>.ret then { + def MMA_NAME<layout_a, layout_b, satf, b1op, kind, op[0], op[1], op[2], op[3]>.record + : NVVM_MMA<op[0], op[1], op[2], op[3]>; + } + } // kind } // b1op } // op } // satf diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 823c491..66e24fa 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -150,6 +150,14 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [IntrNoMem]>; + def int_spv_resource_counterhandlefromimplicitbinding + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_spv_resource_counterhandlefrombinding + : DefaultAttrsIntrinsic<[llvm_any_ty], + [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; |