aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td40
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 1b748b7..e8fda82 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
+let AddedComplexity = 1, Predicates = [HasVLX] in {
+ def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins),
+ "", [(set VR128X:$dst, (v4i32 immAllOnesV))]>;
+ def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins),
+ "", [(set VR256X:$dst, (v8i32 immAllOnesV))]>;
+}
}
let Predicates = [HasAVX512] in {
@@ -3161,6 +3167,12 @@ multiclass avx512_mask_setop_w<SDPatternOperator Val> {
defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
+// 8-bit mask set operations for AVX512DQ
+let Predicates = [HasDQI] in {
+ defm KSET0B : avx512_mask_setop<VK8, v8i1, immAllZerosV>;
+ defm KSET1B : avx512_mask_setop<VK8, v8i1, immAllOnesV>;
+}
+
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
@@ -3173,6 +3185,34 @@ let Predicates = [HasAVX512] in {
def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
}
+// With AVX512DQ, use 8-bit operations for 8-bit masks to avoid setting upper
+// bits
+let Predicates = [HasDQI] in {
+ def : Pat<(v8i1 immAllZerosV), (KSET0B)>;
+ def : Pat<(v8i1 immAllOnesV), (KSET1B)>;
+}
+
+// Optimize bitconvert of all-ones constants to use kxnor instructions
+let Predicates = [HasDQI] in {
+ def : Pat<(v8i1(bitconvert(i8 255))), (KSET1B)>;
+ def : Pat<(v16i1(bitconvert(i16 255))), (COPY_TO_REGCLASS(KSET1B), VK16)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1(bitconvert(i32 -1))), (KSET1D)>;
+ def : Pat<(v64i1(bitconvert(i64 -1))), (KSET1Q)>;
+}
+// Submask patterns: lower N bits set in larger mask registers
+let Predicates = [HasBWI, HasDQI] in {
+ // v32i1 submasks
+ def : Pat<(v32i1(bitconvert(i32 255))), (COPY_TO_REGCLASS(KSET1B), VK32)>;
+ def : Pat<(v32i1(bitconvert(i32 65535))), (COPY_TO_REGCLASS(KSET1W), VK32)>;
+ // v64i1 submasks
+ def : Pat<(v64i1(bitconvert(i64 255))), (COPY_TO_REGCLASS(KSET1B), VK64)>;
+ def : Pat<(v64i1(bitconvert(i64 65535))), (COPY_TO_REGCLASS(KSET1W), VK64)>;
+ def : Pat<(v64i1(bitconvert(i64 4294967295))), (COPY_TO_REGCLASS(KSET1D),
+ VK64)>;
+}
+
// Patterns for kmask insert_subvector/extract_subvector to/from index=0
multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
RegisterClass RC, ValueType VT> {