diff options
author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2014-08-14 13:18:42 +0000 |
---|---|---|
committer | Kirill Yukhin <kyukhin@gcc.gnu.org> | 2014-08-14 13:18:42 +0000 |
commit | 9e4a4dd6518a86c94278a978412e162c4b770c7b (patch) | |
tree | 33cf0a1877fdbac992df7ad08b2c28b523fe991f /gcc | |
parent | 7d39012c24ded7a9d1d4eb1fa2916181dac3ada2 (diff) | |
download | gcc-9e4a4dd6518a86c94278a978412e162c4b770c7b.zip gcc-9e4a4dd6518a86c94278a978412e162c4b770c7b.tar.gz gcc-9e4a4dd6518a86c94278a978412e162c4b770c7b.tar.bz2 |
i386.c (print_reg): Сorrectly print 64-bit mask registers.
gcc/
* config/i386/i386.c (print_reg): Сorrectly print 64-bit mask
registers.
(inline_secondary_memory_needed): Allow 64 bit wide mask registers.
(ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and
xmm/ymm16+ when availble.
* config/i386/i386.h
(HARD_REGNO_NREGS): Add mask regs.
(VALID_AVX512F_REG_MODE): Ditto.
(VALID_AVX512F_REG_MODE) : Define.
(VALID_MASK_AVX512BW_MODE): Ditto.
(reg_class) (MASK_REG_P(X)): Define.
* config/i386/i386.md: Do not split long moves with mask register,
use kmovb if avx512bw is availible.
(movdi_internal): Handle mask registers.
Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com>
Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com>
Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com>
Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com>
Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com>
From-SVN: r213962
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 24 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 40 |
4 files changed, 82 insertions, 14 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index db09e06..a65af97 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2014-08-14 Alexander Ivchenko <alexander.ivchenko@intel.com> + Maxim Kuznetsov <maxim.kuznetsov@intel.com> + Anna Tikhonova <anna.tikhonova@intel.com> + Ilya Tocar <ilya.tocar@intel.com> + Andrey Turetskiy <andrey.turetskiy@intel.com> + Ilya Verbin <ilya.verbin@intel.com> + Kirill Yukhin <kirill.yukhin@intel.com> + Michael Zolotukhin <michael.v.zolotukhin@intel.com> + + * config/i386/i386.c (print_reg): Сorrectly print 64-bit mask + registers. + (inline_secondary_memory_needed): Allow 64 bit wide mask registers. + (ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and + xmm/ymm16+ when availble. + * config/i386/i386.h + (HARD_REGNO_NREGS): Add mask regs. + (VALID_AVX512F_REG_MODE): Ditto. + (VALID_AVX512F_REG_MODE) : Define. + (VALID_MASK_AVX512BW_MODE): Ditto. + (reg_class) (MASK_REG_P(X)): Define. + * config/i386/i386.md: Do not split long moves with mask register, + use kmovb if avx512bw is availible. + (movdi_internal): Handle mask registers. + 2014-08-14 Richard Biener <rguenther@suse.de> PR tree-optimization/62081 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c77e8a6..5a3b67a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14701,7 +14701,7 @@ print_reg (rtx x, int code, FILE *file) case 8: case 4: case 12: - if (! ANY_FP_REG_P (x)) + if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x)) putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); /* FALLTHRU */ case 16: @@ -37393,6 +37393,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) return true; + /* Between mask and general, we have moves no larger than word size. */ + if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2)) + && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) + return true; + /* ??? This is a lie. We do have moves between mmx/general, and for mmx/sse2. But by saying we need secondary memory we discourage the register allocator from using the mmx registers unless needed. */ @@ -37698,7 +37703,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) if (STACK_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (MASK_REGNO_P (regno)) - return VALID_MASK_REG_MODE (mode); + return (VALID_MASK_REG_MODE (mode) + || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode))); if (SSE_REGNO_P (regno)) { /* We implement the move patterns for all vector modes into and @@ -37715,6 +37721,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) || VALID_AVX512F_SCALAR_MODE (mode))) return true; + /* TODO check for QI/HI scalars. */ + /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ + if (TARGET_AVX512VL + && (mode == OImode + || mode == TImode + || VALID_AVX256_REG_MODE (mode) + || VALID_AVX512VL_128_REG_MODE (mode))) + return true; + /* xmm16-xmm31 are only available for AVX-512. */ if (EXT_REX_SSE_REGNO_P (regno)) return false; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8677e6b..c2f0cee 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); applied to them. */ #define HARD_REGNO_NREGS(REGNO, MODE) \ - (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ + (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) \ + || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO) \ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ : ((MODE) == XFmode \ ? (TARGET_64BIT ? 2 : 3) \ @@ -1085,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_AVX512F_REG_MODE(MODE) \ ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ - || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode) + || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \ + || (MODE) == V4TImode) + +#define VALID_AVX512VL_128_REG_MODE(MODE) \ + ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ + || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode) #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ @@ -1132,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode) +#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode) + /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ #define HARD_REGNO_MODE_OK(REGNO, MODE) \ @@ -1454,6 +1462,7 @@ enum reg_class : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \ : (FIRST_EXT_REX_SSE_REG + (N) - 16)) +#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X))) #define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG) #define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X))) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3cb8b67..4867e7e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -952,6 +952,9 @@ ;; Instruction suffix for integer modes. (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) +;; Instruction suffix for masks. +(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")]) + ;; Pointer size prefix for integer modes (Intel asm dialect) (define_mode_attr iptrsize [(QI "BYTE") (HI "WORD") @@ -2022,13 +2025,16 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi") + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))] + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { + case TYPE_MSKMOV: + return "kmovq\t{%1, %0|%0, %1}"; + case TYPE_MULTI: return "#"; @@ -2099,7 +2105,7 @@ [(set (attr "isa") (cond [(eq_attr "alternative" "0,1") (const_string "nox64") - (eq_attr "alternative" "2,3,4,5,10,11,16,18") + (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23") (const_string "x64") (eq_attr "alternative" "17") (const_string "x64_sse4") @@ -2118,6 +2124,8 @@ (const_string "ssemov") (eq_attr "alternative" "19,20") (const_string "ssecvt") + (eq_attr "alternative" "21,22,23,24") + (const_string "mskmov") (match_operand 1 "pic_32bit_operand") (const_string "lea") ] @@ -2179,16 +2187,20 @@ [(set (match_operand:DI 0 "nonimmediate_operand") (match_operand:DI 1 "general_operand"))] "!TARGET_64BIT && reload_completed - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) - && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + && !(MMX_REG_P (operands[0]) + || SSE_REG_P (operands[0]) + || MASK_REG_P (operands[0])) + && !(MMX_REG_P (operands[1]) + || SSE_REG_P (operands[1]) + || MASK_REG_P (operands[1]))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi") + "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))] + "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2199,6 +2211,9 @@ return standard_sse_constant_opcode (insn, operands[1]); + case TYPE_MSKMOV: + return "kmovd\t{%1, %0|%0, %1}"; + case TYPE_SSEMOV: switch (get_attr_mode (insn)) { @@ -2262,6 +2277,8 @@ (const_string "sselog1") (eq_attr "alternative" "7,8,9,10,12") (const_string "ssemov") + (eq_attr "alternative" "13,14") + (const_string "mskmov") (match_operand 1 "pic_32bit_operand") (const_string "lea") ] @@ -2410,9 +2427,12 @@ case TYPE_MSKMOV: switch (which_alternative) { - case 7: return "kmovw\t{%k1, %0|%0, %k1}"; - case 8: return "kmovw\t{%1, %0|%0, %1}"; - case 9: return "kmovw\t{%1, %k0|%k0, %1}"; + case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}" + : "kmovw\t{%k1, %0|%0, %k1}"; + case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}" + : "kmovw\t{%1, %0|%0, %1}"; + case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}" + : "kmovw\t{%1, %k0|%k0, %1}"; default: gcc_unreachable (); } |