diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1b748b7..e8fda82 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllZerosV))]>; def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; +let AddedComplexity = 1, Predicates = [HasVLX] in { + def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins), + "", [(set VR128X:$dst, (v4i32 immAllOnesV))]>; + def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins), + "", [(set VR256X:$dst, (v8i32 immAllOnesV))]>; +} } let Predicates = [HasAVX512] in { @@ -3161,6 +3167,12 @@ multiclass avx512_mask_setop_w<SDPatternOperator Val> { defm KSET0 : avx512_mask_setop_w<immAllZerosV>; defm KSET1 : avx512_mask_setop_w<immAllOnesV>; +// 8-bit mask set operations for AVX512DQ +let Predicates = [HasDQI] in { + defm KSET0B : avx512_mask_setop<VK8, v8i1, immAllZerosV>; + defm KSET1B : avx512_mask_setop<VK8, v8i1, immAllOnesV>; +} + // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; @@ -3173,6 +3185,34 @@ let Predicates = [HasAVX512] in { def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; } +// With AVX512DQ, use 8-bit operations for 8-bit masks to avoid setting upper +// bits +let Predicates = [HasDQI] in { + def : Pat<(v8i1 immAllZerosV), (KSET0B)>; + def : Pat<(v8i1 immAllOnesV), (KSET1B)>; +} + +// Optimize bitconvert of all-ones constants to use kxnor instructions +let Predicates = [HasDQI] in { + def : Pat<(v8i1(bitconvert(i8 255))), (KSET1B)>; + def : Pat<(v16i1(bitconvert(i16 255))), (COPY_TO_REGCLASS(KSET1B), VK16)>; +} +let Predicates = [HasBWI] in { + def : Pat<(v32i1(bitconvert(i32 -1))), (KSET1D)>; + def : Pat<(v64i1(bitconvert(i64 -1))), (KSET1Q)>; +} +// Submask patterns: lower N bits set in larger mask registers +let Predicates = [HasBWI, HasDQI] in { + // v32i1 submasks + def : Pat<(v32i1(bitconvert(i32 255))), (COPY_TO_REGCLASS(KSET1B), VK32)>; + def : Pat<(v32i1(bitconvert(i32 65535))), (COPY_TO_REGCLASS(KSET1W), VK32)>; + // v64i1 submasks + def : Pat<(v64i1(bitconvert(i64 255))), (COPY_TO_REGCLASS(KSET1B), VK64)>; + def : Pat<(v64i1(bitconvert(i64 65535))), (COPY_TO_REGCLASS(KSET1W), VK64)>; + def : Pat<(v64i1(bitconvert(i64 4294967295))), (COPY_TO_REGCLASS(KSET1D), + VK64)>; +} + // Patterns for kmask insert_subvector/extract_subvector to/from index=0 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, RegisterClass RC, ValueType VT> { |
