aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorAlexander Ivchenko <alexander.ivchenko@intel.com>2013-09-11 07:32:30 +0000
committerKirill Yukhin <kyukhin@gcc.gnu.org>2013-09-11 07:32:30 +0000
commit85a772214402901c17d47196add4c508e25866c2 (patch)
treeb6da176aa4314e7d30c478bc8a2b2861c1b8d757 /gcc/config
parent7e570821527734991f2468a15aaec706e23e9e92 (diff)
downloadgcc-85a772214402901c17d47196add4c508e25866c2.zip
gcc-85a772214402901c17d47196add4c508e25866c2.tar.gz
gcc-85a772214402901c17d47196add4c508e25866c2.tar.bz2
constraints.md (k): New.
* config/i386/constraints.md (k): New. (Yk): Ditto. * config/i386/i386.c (const regclass_map): Add new mask registers. (dbx_register_map): Ditto. (dbx64_register_map): Ditto. (svr4_dbx_register_map): Ditto. (ix86_conditional_register_usage): Squash mask registers if AVX512F is disabled. (ix86_preferred_reload_class): Disable constants for mask registers. (ix86_secondary_reload): Do spill of mask register using 32-bit insn. (ix86_hard_regno_mode_ok): Support new mask registers. (x86_order_regs_for_local_alloc): Ditto. * config/i386/i386.h (FIRST_PSEUDO_REGISTER): Update. (FIXED_REGISTERS): Add new mask registers. (CALL_USED_REGISTERS): Ditto. (REG_ALLOC_ORDER): Ditto. (VALID_MASK_REG_MODE): New. (FIRST_MASK_REG): Ditto. (LAST_MASK_REG): Ditto. (reg_class): Add MASK_EVEX_REGS, MASK_REGS. (MAYBE_MASK_CLASS_P): New. (REG_CLASS_NAMES): Add MASK_EVEX_REGS, MASK_REGS. (REG_CLASS_CONTENTS): Ditto. (MASK_REGNO_P): New. (ANY_MASK_REG_P): Ditto. (HI_REGISTER_NAMES): Add new mask registers. * config/i386/i386.md (MASK0_REG, MASK1_REG, MASK2_REG, MASK3_REG, MASK4_REG, MASK5_REG, MASK6_REG, MASK7_REG): Constants for new mask registers. (attribute "type"): Add mskmov, msklog. (attribute "length_immediate"): Support them. (attribute "memory"): Ditto. (attribute "prefix_0f"): Ditto. (*movhi_internal): Support new mask registers. (*movqi_internal): Ditto. (define_split): Split out clobber pattern is a logic insn on mask registers. (*k<logic><mode>): New. (*andhi_1): Extend to support mask regs. (*andqi_1): Extend to support mask regs. (kandn<mode>): New. (define_split): Split and-not to and and not if operands are not mask regs. (*<code><mode>_1): Separate HI mode to new pattern... (*<code>hi_1): This. (*<code>qi_1): Extend to support mask regs. (kxnor<mode>): New. (kortestzhi): Ditto. (kortestchi): Ditto. (kunpckhi): Ditto. (*one_cmpl<mode>2_1): Remove HImode and handle it... (*one_cmplhi2_1): ...Here, now with mask registers support. (*one_cmplqi2_1): Support new mask registers. (HI/QImode arithmetics splitter): Don't split if mask registers are used. (HI/QImode not splitter): Ditto. * config/i386/predicated.md (mask_reg_operand): New. (general_reg_operand): Ditto. Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com> Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com> Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com> Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com> Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com> Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com> Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com> Co-Authored-By: Sergey Lega <sergey.s.lega@intel.com> From-SVN: r202491
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/constraints.md8
-rw-r--r--gcc/config/i386/i386.c38
-rw-r--r--gcc/config/i386/i386.h38
-rw-r--r--gcc/config/i386/i386.md286
-rw-r--r--gcc/config/i386/predicates.md9
5 files changed, 317 insertions, 62 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 28e626f..92e0c05 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -19,7 +19,7 @@
;;; Unused letters:
;;; B H T
-;;; h jk
+;;; h j
;; Integer register constraints.
;; It is not necessary to define 'r' here.
@@ -78,6 +78,12 @@
"TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
"Second from top of 80387 floating-point stack (@code{%st(1)}).")
+(define_register_constraint "k" "TARGET_AVX512F ? MASK_EVEX_REGS : NO_REGS"
+"@internal Any mask register that can be used as predicate, i.e. k1-k7.")
+
+(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
+"@internal Any mask register.")
+
;; Vector registers (also used for plain floating point nowadays).
(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
"Any MMX register.")
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fe9a714..72549e9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2032,6 +2032,9 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ /* Mask registers. */
+ MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+ MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
};
/* The "default" register map used in 32bit mode. */
@@ -2047,6 +2050,7 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* The "default" register map used in 64bit mode. */
@@ -2062,6 +2066,7 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
+ 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
@@ -2129,6 +2134,7 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* Define parameter passing and return registers. */
@@ -4224,8 +4230,13 @@ ix86_conditional_register_usage (void)
/* If AVX512F is disabled, squash the registers. */
if (! TARGET_AVX512F)
- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+ {
+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+ }
}
@@ -33918,10 +33929,12 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
return regclass;
/* Force constants into memory if we are loading a (nonzero) constant into
- an MMX or SSE register. This is because there are no MMX/SSE instructions
- to load from a constant. */
+ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
+ instructions to load from a constant. */
if (CONSTANT_P (x)
- && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+ && (MAYBE_MMX_CLASS_P (regclass)
+ || MAYBE_SSE_CLASS_P (regclass)
+ || MAYBE_MASK_CLASS_P (regclass)))
return NO_REGS;
/* Prefer SSE regs only, if we can use them for math. */
@@ -34025,10 +34038,11 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
- if (!TARGET_64BIT
- && !in_p && mode == QImode
- && INTEGER_CLASS_P (rclass)
- && MAYBE_NON_Q_CLASS_P (rclass))
+ if (mode == QImode
+ && (MAYBE_MASK_CLASS_P (rclass)
+ || (!TARGET_64BIT && !in_p
+ && INTEGER_CLASS_P (rclass)
+ && MAYBE_NON_Q_CLASS_P (rclass))))
{
int regno;
@@ -34450,6 +34464,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return false;
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
+ if (MASK_REGNO_P (regno))
+ return VALID_MASK_REG_MODE (mode);
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -35259,6 +35275,10 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e820aa6..709d3ed 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -893,7 +893,7 @@ enum target_cpu_default
eliminated during reloading in favor of either the stack or frame
pointer. */
-#define FIRST_PSEUDO_REGISTER 69
+#define FIRST_PSEUDO_REGISTER 77
/* Number of hardware registers that go into the DWARF-2 unwind info.
If not defined, equals FIRST_PSEUDO_REGISTER. */
@@ -923,7 +923,9 @@ enum target_cpu_default
/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
0, 0, 0, 0, 0, 0, 0, 0, \
/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
- 0, 0, 0, 0, 0, 0, 0, 0 }
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
/* 1 for registers not available across function calls.
These must include the FIXED_REGISTERS and also any
@@ -955,7 +957,9 @@ enum target_cpu_default
/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
6, 6, 6, 6, 6, 6, 6, 6, \
/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
- 6, 6, 6, 6, 6, 6, 6, 6 }
+ 6, 6, 6, 6, 6, 6, 6, 6, \
+ /* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
@@ -971,7 +975,7 @@ enum target_cpu_default
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \
- 63, 64, 65, 66, 67, 68 }
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76 }
/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
to be rearranged based on a particular function. When using sse math,
@@ -1068,6 +1072,8 @@ enum target_cpu_default
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
|| (MODE) == V16SFmode)
+#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
@@ -1093,8 +1099,10 @@ enum target_cpu_default
(CC_REGNO_P (REGNO) ? VOIDmode \
: (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode \
: (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
- : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode \
- : (MODE) == QImode && !(TARGET_64BIT || QI_REGNO_P (REGNO)) ? SImode \
+ : (MODE) == HImode && !(TARGET_PARTIAL_REG_STALL \
+ || MASK_REGNO_P (REGNO)) ? SImode \
+ : (MODE) == QImode && !(TARGET_64BIT || QI_REGNO_P (REGNO) \
+ || MASK_REGNO_P (REGNO)) ? SImode \
: (MODE))
/* The only ABI that saves SSE registers across calls is Win64 (thus no
@@ -1141,6 +1149,9 @@ enum target_cpu_default
#define FIRST_EXT_REX_SSE_REG (LAST_REX_SSE_REG + 1) /*53*/
#define LAST_EXT_REX_SSE_REG (FIRST_EXT_REX_SSE_REG + 15) /*68*/
+#define FIRST_MASK_REG (LAST_EXT_REX_SSE_REG + 1) /*69*/
+#define LAST_MASK_REG (FIRST_MASK_REG + 7) /*76*/
+
/* Override this in other tm.h files to cope with various OS lossage
requiring a frame pointer. */
#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
@@ -1229,6 +1240,8 @@ enum reg_class
FLOAT_INT_REGS,
INT_SSE_REGS,
FLOAT_INT_SSE_REGS,
+ MASK_EVEX_REGS,
+ MASK_REGS,
ALL_REGS, LIM_REG_CLASSES
};
@@ -1250,6 +1263,8 @@ enum reg_class
reg_classes_intersect_p ((CLASS), ALL_SSE_REGS)
#define MAYBE_MMX_CLASS_P(CLASS) \
reg_classes_intersect_p ((CLASS), MMX_REGS)
+#define MAYBE_MASK_CLASS_P(CLASS) \
+ reg_classes_intersect_p ((CLASS), MASK_REGS)
#define Q_CLASS_P(CLASS) \
reg_class_subset_p ((CLASS), Q_REGS)
@@ -1282,6 +1297,8 @@ enum reg_class
"FLOAT_INT_REGS", \
"INT_SSE_REGS", \
"FLOAT_INT_SSE_REGS", \
+ "MASK_EVEX_REGS", \
+ "MASK_REGS", \
"ALL_REGS" }
/* Define which registers fit in which classes. This is an initializer
@@ -1319,7 +1336,9 @@ enum reg_class
{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \
{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \
{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \
-{ 0xffffffff,0xffffffff, 0x1f } \
+ { 0x0, 0x0,0x1fc0 }, /* MASK_EVEX_REGS */ \
+ { 0x0, 0x0,0x1fe0 }, /* MASK_REGS */ \
+{ 0xffffffff,0xffffffff,0x1fff } \
}
/* The same information, inverted:
@@ -1377,6 +1396,8 @@ enum reg_class
: (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
: (FIRST_EXT_REX_SSE_REG + (N) - 16))
+#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
+#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define SSE_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
@@ -1933,7 +1954,8 @@ do { \
"xmm16", "xmm17", "xmm18", "xmm19", \
"xmm20", "xmm21", "xmm22", "xmm23", \
"xmm24", "xmm25", "xmm26", "xmm27", \
- "xmm28", "xmm29", "xmm30", "xmm31" }
+ "xmm28", "xmm29", "xmm30", "xmm31", \
+ "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }
#define REGISTER_NAMES HI_REGISTER_NAMES
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3307b08..013673a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -328,6 +328,14 @@
(XMM29_REG 66)
(XMM30_REG 67)
(XMM31_REG 68)
+ (MASK0_REG 69)
+ (MASK1_REG 70)
+ (MASK2_REG 71)
+ (MASK3_REG 72)
+ (MASK4_REG 73)
+ (MASK5_REG 74)
+ (MASK6_REG 75)
+ (MASK7_REG 76)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -360,7 +368,7 @@
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
- lwp,
+ lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -379,7 +387,7 @@
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
- sseshuf,sseshuf1,ssemuladd,sse4arg")
+ sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
@@ -390,7 +398,7 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
- bitmanip,imulx")
+ bitmanip,imulx,msklog,mskmov")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -451,7 +459,7 @@
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
(if_then_else
- (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
(eq_attr "unit" "sse,mmx"))
(const_int 1)
(const_int 0)))
@@ -651,7 +659,7 @@
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
- mmx,mmxmov,mmxcmp,mmxcvt")
+ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
(match_operand 2 "memory_operand"))
(const_string "load")
(and (eq_attr "type" "icmov,ssemuladd,sse4arg")
@@ -695,7 +703,7 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
+ avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
(const_string "base"))
(define_attr "enabled" ""
@@ -718,6 +726,7 @@
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
(eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
(eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
+ (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
(eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
(eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
@@ -2213,8 +2222,8 @@
(const_string "SI")))])
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,Yk,Yk,rm")
+ (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2223,6 +2232,16 @@
/* movzwl is faster than movw on p2 due to partial word stalls,
though not as fast as an aligned movl. */
return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 4: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 5: return "kmovw\t{%1, %0|%0, %1}";
+ case 6: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2240,11 +2259,17 @@
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand"))
(const_string "imov")
+ (eq_attr "alternative" "4,5,6")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "4,5,6")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
@@ -2269,8 +2294,8 @@
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_internal"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
- (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,Yk,Yk,r")
+ (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn,r ,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2278,6 +2303,16 @@
case TYPE_IMOVX:
gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 7: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return "kmovw\t{%1, %0|%0, %1}";
+ case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2297,11 +2332,17 @@
(const_string "imov")
(eq_attr "alternative" "3,5")
(const_string "imovx")
+ (eq_attr "alternative" "7,8,9")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,5")
(const_string "SI")
@@ -7494,6 +7535,26 @@
operands[3] = gen_lowpart (QImode, operands[3]);
})
+(define_split
+ [(set (match_operand:SWI12 0 "mask_reg_operand")
+ (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
+ (match_operand:SWI12 2 "mask_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F && reload_completed"
+ [(set (match_dup 0)
+ (any_logic:SWI12 (match_dup 1)
+ (match_dup 2)))])
+
+(define_insn "*k<logic><mode>"
+ [(set (match_operand:SWI12 0 "mask_reg_operand" "=Yk")
+ (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "Yk")
+ (match_operand:SWI12 2 "mask_reg_operand" "Yk")))]
+ "TARGET_AVX512F"
+ "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers. If this is considered useful,
;; it should be done with splitters.
@@ -7617,9 +7678,9 @@
(set_attr "mode" "SI")])
(define_insn "*andhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya")
- (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
- (match_operand:HI 2 "general_operand" "rn,rm,L")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya,!Yk")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm,Yk")
+ (match_operand:HI 2 "general_operand" "rn,rm,L,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, HImode, operands)"
{
@@ -7628,34 +7689,38 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandw\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
return "and{w}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,imovx")
- (set_attr "length_immediate" "*,*,0")
+ [(set_attr "type" "alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,0,*")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
(match_operand 1 "ext_QIreg_operand"))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "HI,HI,SI")])
+ (set_attr "mode" "HI,HI,SI,HI")])
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
- (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!Yk")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, QImode, operands)"
"@
and{b}\t{%2, %0|%0, %2}
and{b}\t{%2, %0|%0, %2}
- and{l}\t{%k2, %k0|%k0, %k2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI,QI,SI")])
+ and{l}\t{%k2, %k0|%k0, %k2}
+ kandw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,alu,msklog")
+ (set_attr "mode" "QI,QI,SI,HI")])
(define_insn "*andqi_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -7668,6 +7733,40 @@
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
+(define_insn "kandn<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!Yk")
+ (and:SWI12
+ (not:SWI12
+ (match_operand:SWI12 1 "register_operand" "r,0,Yk"))
+ (match_operand:SWI12 2 "register_operand" "r,r,Yk")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F"
+ "@
+ andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
+ #
+ kandnw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "bmi,*,avx512f")
+ (set_attr "type" "bitmanip,*,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "btver2_decode" "direct,*,*")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:SWI12 0 "general_reg_operand")
+ (and:SWI12
+ (not:SWI12
+ (match_dup 0))
+ (match_operand:SWI12 1 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F && !TARGET_BMI && reload_completed"
+ [(set (match_dup 0)
+ (not:HI (match_dup 0)))
+ (parallel [(set (match_dup 0)
+ (and:HI (match_dup 0)
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
;; Turn *anddi_1 into *andsi_1_zext if possible.
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -7999,29 +8098,44 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,rm")
- (any_or:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+ (any_or:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*<code>hi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!Yk")
+ (any_or:HI
+ (match_operand:HI 1 "nonimmediate_operand" "%0,0,Yk")
+ (match_operand:HI 2 "general_operand" "<g>,r<i>,Yk")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, HImode, operands)"
+ "@
+ <logic>{w}\t{%2, %0|%0, %2}
+ <logic>{w}\t{%2, %0|%0, %2}
+ k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,msklog")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
- (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
- (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!Yk")
+ (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,Yk")
+ (match_operand:QI 2 "general_operand" "qmn,qn,rn,Yk")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, QImode, operands)"
"@
<logic>{b}\t{%2, %0|%0, %2}
<logic>{b}\t{%2, %0|%0, %2}
- <logic>{l}\t{%k2, %k0|%k0, %k2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI,QI,SI")])
+ <logic>{l}\t{%k2, %k0|%k0, %k2}
+ k<logic>w\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,alu,msklog")
+ (set_attr "mode" "QI,QI,SI,HI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
@@ -8071,6 +8185,74 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=r,!Yk")
+ (not:SWI12
+ (xor:SWI12
+ (match_operand:SWI12 1 "register_operand" "0,Yk")
+ (match_operand:SWI12 2 "register_operand" "r,Yk"))))]
+ "TARGET_AVX512F"
+ "@
+ #
+ kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "*,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:SWI12 0 "general_reg_operand")
+ (not:SWI12
+ (xor:SWI12
+ (match_dup 0)
+ (match_operand:SWI12 1 "general_reg_operand"))))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel [(set (match_dup 0)
+ (xor:HI (match_dup 0)
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0)
+ (not:HI (match_dup 0)))]
+ "")
+
+(define_insn "kortestzhi"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int 0)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestchi"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int -1)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kunpckhi"
+ [(set (match_operand:HI 0 "register_operand" "=Yk")
+ (ior:HI
+ (ashift:HI
+ (match_operand:HI 1 "register_operand" "Yk")
+ (const_int 8))
+ (zero_extend:HI (match_operand:QI 2 "register_operand" "Yk"))))]
+ "TARGET_AVX512F"
+ "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8640,23 +8822,38 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
- (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"not{<imodesuffix>}\t%0"
[(set_attr "type" "negnot")
(set_attr "mode" "<MODE>")])
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,!Yk")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,Yk")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "@
+ not{w}\t%0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512f")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
- (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!Yk")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,Yk")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
not{b}\t%0
- not{l}\t%k0"
- [(set_attr "type" "negnot")
- (set_attr "mode" "QI,SI")])
+ not{l}\t%k0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,*,avx512f")
+ (set_attr "type" "negnot,negnot,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "mode" "QI,SI,QI")])
;; ??? Currently never generated - xor is used instead.
(define_insn "*one_cmplsi2_1_zext"
@@ -16423,11 +16620,11 @@
})
;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
-
+;; Do not split instructions with mask registers.
(define_split
- [(set (match_operand 0 "register_operand")
+ [(set (match_operand 0 "general_reg_operand")
(match_operator 3 "promotable_binary_operator"
- [(match_operand 1 "register_operand")
+ [(match_operand 1 "general_reg_operand")
(match_operand 2 "aligned_operand")]))
(clobber (reg:CC FLAGS_REG))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
@@ -16522,9 +16719,10 @@
operands[1] = gen_lowpart (SImode, operands[1]);
})
+;; Do not split instructions with mask regs.
(define_split
- [(set (match_operand 0 "register_operand")
- (not (match_operand 1 "register_operand")))]
+ [(set (match_operand 0 "general_reg_operand")
+ (not (match_operand 1 "general_reg_operand")))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
&& (GET_MODE (operands[0]) == HImode
|| (GET_MODE (operands[0]) == QImode
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3959c38..18f425c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -32,6 +32,11 @@
(and (match_code "reg")
(not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
+;; True if the operand is a GENERAL class register.
+(define_predicate "general_reg_operand"
+ (and (match_code "reg")
+ (match_test "GENERAL_REG_P (op)")))
+
;; Return true if OP is a register operand other than an i387 fp register.
(define_predicate "register_and_not_fp_reg_operand"
(and (match_code "reg")
@@ -52,6 +57,10 @@
(and (match_code "reg")
(match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
+;; True if the operand is an AVX-512 mask register.
+(define_predicate "mask_reg_operand"
+ (and (match_code "reg")
+ (match_test "MASK_REGNO_P (REGNO (op))")))
;; True if the operand is a Q_REGS class register.
(define_predicate "q_regs_operand"