diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 25 | ||||
-rw-r--r-- | gcc/common/config/i386/i386-common.c | 23 | ||||
-rw-r--r-- | gcc/config.gcc | 6 | ||||
-rw-r--r-- | gcc/config/i386/cpuid.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 14 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 6 | ||||
-rw-r--r-- | gcc/config/i386/immintrin.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 16 | ||||
-rw-r--r-- | gcc/config/i386/vpclmulqdqintrin.h | 108 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 9 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-1.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512-check.h | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c | 60 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/i386.exp | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-13.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vpclmulqdq.c | 20 |
23 files changed, 349 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9284a8c..c907770 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2017-12-20 Julia Koval <julia.koval@intel.com> + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_VPCLMULQDQ_SET, + OPTION_MASK_ISA_VPCLMULQDQ_UNSET): New. + (ix86_handle_option): Handle -mvpclmulqdq, move cx6 to flags2. + * config.gcc: Include vpclmulqdqintrin.h. + * config/i386/cpuid.h: Handle bit_VPCLMULQDQ. + * config/i386/driver-i386.c (host_detect_local_cpu): Handle + -mvpclmulqdq. + * config/i386/i386-builtin.def (__builtin_ia32_vpclmulqdq_v2di, + __builtin_ia32_vpclmulqdq_v4di, __builtin_ia32_vpclmulqdq_v8di): New. + * config/i386/i386-c.c (__VPCLMULQDQ__): New. + * config/i386/i386.c (isa2_opts): Add -mcx16. + (isa_opts): Add -mpclmulqdq, remove -mcx16. + (ix86_option_override_internal): Move mcx16 to flags2. + (ix86_valid_target_attribute_inner_p): Add vpclmulqdq. + (ix86_expand_builtin): Handle OPTION_MASK_ISA_VPCLMULQDQ. + * config/i386/i386.h (TARGET_VPCLMULQDQ, TARGET_VPCLMULQDQ_P): New. + * config/i386/i386.opt: Add mvpclmulqdq, move mcx16 to flags2. + * config/i386/immintrin.h: Include vpclmulqdqintrin.h. + * config/i386/sse.md (vpclmulqdq_<mode>): New pattern. + * config/i386/vpclmulqdqintrin.h (_mm512_clmulepi64_epi128, + _mm_clmulepi64_epi128, _mm256_clmulepi64_epi128): New intrinsics. + * doc/invoke.texi: Add -mvpclmulqdq. + 2017-12-20 Tom de Vries <tom@codesourcery.com> PR middle-end/83423 diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 575a914..00eb017 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_SET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_SET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ /* Define a set of ISAs which aren't available when a given ISA is disabled. MMX and SSE ISAs are handled separately. */ @@ -214,6 +215,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_UNSET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_UNSET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_UNSET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -554,6 +556,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mvpclmulqdq: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + } + return true; + case OPT_mavx5124fmaps: if (value) { @@ -889,13 +904,13 @@ ix86_handle_option (struct gcc_options *opts, case OPT_mcx16: if (value) { - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_SET; } else { - opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_UNSET; } return true; diff --git a/gcc/config.gcc b/gcc/config.gcc index e208d00..4c2b382 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -381,7 +381,8 @@ i[34567]86-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -408,7 +409,8 @@ x86_64-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 41369c2..37f3e1a 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -102,6 +102,7 @@ #define bit_GFNI (1 << 8) #define bit_VAES (1 << 9) #define bit_AVX512VNNI (1 << 11) +#define bit_VPCLMULQDQ (1 << 10) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 013107a..99826fd 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -420,6 +420,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_gfni = 0, has_avx512vbmi2 = 0; unsigned int has_ibt = 0, has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; + unsigned int has_vpclmulqdq = 0; bool arch; @@ -513,6 +514,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_rdpid = ecx & bit_RDPID; has_gfni = ecx & bit_GFNI; has_vaes = ecx & bit_VAES; + has_vpclmulqdq = ecx & bit_VPCLMULQDQ; has_avx5124vnniw = edx & bit_AVX5124VNNIW; has_avx5124fmaps = edx & bit_AVX5124FMAPS; @@ -1080,6 +1082,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *ibt = has_ibt ? " -mibt" : " -mno-ibt"; const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; + const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1090,7 +1093,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) xsavec, xsaves, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, - avx512vbmi2, avx512vnni, vaes, NULL); + avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL); } done: diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index e3b12bd..7d65b0b 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2414,6 +2414,11 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v32q BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) +/* VPCLMULQDQ */ +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) + /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 072e49b..de1b0e2 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -486,6 +486,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, } if (isa_flag2 & OPTION_MASK_ISA_VAES) def_or_undef (parse_in, "__VAES__"); + if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ) + def_or_undef (parse_in, "__VPCLMULQDQ__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48d5640..ef321d3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2751,6 +2751,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, ISAs come first. Target string will be displayed in the same order. */ static struct ix86_target_opts isa2_opts[] = { + { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-mmpx", OPTION_MASK_ISA_MPX }, { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 }, { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI }, @@ -2765,6 +2766,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, }; static struct ix86_target_opts isa_opts[] = { + { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ }, { "-mgfni", OPTION_MASK_ISA_GFNI }, { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, @@ -2811,7 +2813,6 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, - { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-msahf", OPTION_MASK_ISA_SAHF }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mcrc32", OPTION_MASK_ISA_CRC32 }, @@ -3998,8 +3999,8 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; if (processor_alias_table[i].flags & PTA_CX16 - && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; + && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; @@ -5330,6 +5331,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("ibt", OPT_mibt), IX86_ATTR_ISA ("shstk", OPT_mshstk), IX86_ATTR_ISA ("vaes", OPT_mvaes), + IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -35376,10 +35378,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, at all, -m64 is a whole TU option. */ if (((ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI)) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ)) && !(ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ) & ix86_isa_flags)) || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL) && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 01fd6ce..7da8573 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -111,6 +111,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_GFNI_P(x) TARGET_ISA_GFNI_P(x) #define TARGET_VAES TARGET_ISA_VAES #define TARGET_VAES_P(x) TARGET_ISA_VAES_P(x) +#define TARGET_VPCLMULQDQ TARGET_ISA_VPCLMULQDQ +#define TARGET_VPCLMULQDQ_P(x) TARGET_ISA_VPCLMULQDQ_P(x) #define TARGET_BMI TARGET_ISA_BMI #define TARGET_BMI_P(x) TARGET_ISA_BMI_P(x) #define TARGET_BMI2 TARGET_ISA_BMI2 diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 04e391d..0e58d38 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -789,6 +789,10 @@ mvaes Target Report Mask(ISA_VAES) Var(ix86_isa_flags2) Save Support VAES built-in functions and code generation. +mvpclmulqdq +Target Report Mask(ISA_VPCLMULQDQ) Var(ix86_isa_flags) Save +Support VPCLMULQDQ built-in functions and code generation. + mbmi Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save Support BMI built-in functions and code generation. @@ -854,7 +858,7 @@ Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save Support TBM built-in functions and code generation. mcx16 -Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save +Target Report Mask(ISA_CX16) Var(ix86_isa_flags2) Save Support code generation of cmpxchg16b instruction. msahf diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index a6e27dd..7fcaa69 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -104,6 +104,8 @@ #include <vaesintrin.h> +#include <vpclmulqdqintrin.h> + #ifndef __RDRND__ #pragma GCC push_options #pragma GCC target("rdrnd") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c1469f4..20e7b16 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -178,6 +178,9 @@ UNSPEC_VAESDECLAST UNSPEC_VAESENC UNSPEC_VAESENCLAST + + ;; For VPCLMULQDQ support + UNSPEC_VPCLMULQDQ ]) (define_c_enum "unspecv" [ @@ -340,6 +343,9 @@ (define_mode_iterator VI8 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) +(define_mode_iterator VI8_FVL + [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VI8_AVX512VL [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) @@ -20498,3 +20504,13 @@ "TARGET_VAES" "vaesenclast\t{%2, %1, %0|%0, %1, %2}" ) + +(define_insn "vpclmulqdq_<mode>" + [(set (match_operand:VI8_FVL 0 "register_operand" "=v") + (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v") + (match_operand:VI8_FVL 2 "vector_operand" "vm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_VPCLMULQDQ))] + "TARGET_VPCLMULQDQ" + "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "mode" "DI")]) diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h new file mode 100644 index 0000000..483e160 --- /dev/null +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -0,0 +1,108 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _VPCLMULQDQINTRIN_H_INCLUDED +#define _VPCLMULQDQINTRIN_H_INCLUDED + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512f") +#define __DISABLE_VPCLMULQDQF__ +#endif /* __VPCLMULQDQF__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C) +{ + return (__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)__A, + (__v8di) __B, __C); +} +#else +#define _mm512_clmulepi64_epi128(A, B, C) \ + ((__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQF__ +#undef __DISABLE_VPCLMULQDQF__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQF__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQVL__ +#endif /* __VPCLMULQDQVL__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_clmulepi64_epi128 (__m128i __A, __m128i __B, const int __C) +{ + return (__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)__A, + (__v2di) __B, __C); +} +#else +#define _mm_clmulepi64_epi128(A, B, C) \ + ((__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQVL__ +#undef __DISABLE_VPCLMULQDQVL__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQVL__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQ__ +#endif /* __VPCLMULQDQ__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_clmulepi64_epi128 (__m256i __A, __m256i __B, const int __C) +{ + return (__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)__A, + (__v4di) __B, __C); +} +#else +#define _mm256_clmulepi64_epi128(A, B, C) \ + ((__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQ__ +#undef __DISABLE_VPCLMULQDQ__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQ__ */ + + +#endif /* _VPCLMULQDQINTRIN_H_INCLUDED */ + diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2049c27..cde0c73 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1208,6 +1208,7 @@ See RS/6000 and PowerPC Options. -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol -mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol -mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol +-mvpclmulqdq @gol -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol @@ -26160,13 +26161,17 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mvaes @opindex mvaes +@need 200 +@itemx -mvpclmulqdq +@opindex mvpclmulqdq These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2, -GFNI, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. Each has a -corresponding @option{-mno-} option to disable use of these instructions. +GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. +Each has a corresponding @option{-mno-} option to disable use of these +instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb9e012..aaa8c05 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2017-12-20 Julia Koval <julia.koval@intel.com> + + * gcc.target/i386/avx-1.c: Handle new intrinsics. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/avx512-check.h: Handle bit_VPCLMULQDQ. + * gcc.target/i386/avx512f-vpclmulqdq-2.c: New test. + * gcc.target/i386/avx512vl-vpclmulqdq-2.c: Ditto. + * gcc.target/i386/vpclmulqdq.c: Ditto. + * gcc.target/i386/i386.exp (check_effective_target_vpclmulqdq): New. + 2017-12-19 Martin Sebor <msebor@redhat.com> PR c++/83394 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index bbb4ae2..db77244 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -655,6 +655,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include <wmmintrin.h> #include <immintrin.h> #include <mm3dnow.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 8ea8751..2d174f9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -87,6 +87,9 @@ main () #ifdef VAES && (ecx & bit_VAES) #endif +#ifdef VPCLMULQDQ + && (ecx & bit_VPCLMULQDQ) +#endif && avx512f_os_support ()) { DO_TEST (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c new file mode 100644 index 0000000..fe746a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512F + +#define VPCLMULQDQ +#include "avx512f-helper.h" + +#define SIZE (AVX512F_LEN / 64) + +#include "avx512f-mask-type.h" + +static void +CALC (unsigned long long *r, unsigned long long *s1, unsigned long long *s2, unsigned char imm) +{ + for (int len = 0; len < SIZE/2; len++) + { + unsigned long long src1, src2; + src1 = (imm & 1) ? s1[len*2 + 1] : s1[len*2]; + src2 = ((imm >> 4) & 1) ? s2[len*2 + 1] : s2[len*2]; + for (int i = 0; i < 64; i++) + { + if ((src1 >> i) & 1) + { + if (i) + r[len*2 + 1] ^= src2 >> (64 - i); + r[len*2] ^= src2 << i; + } + } + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_q) res, src1, src2; + unsigned long long res_ref[SIZE]; + unsigned char imm = 1; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 0xFFFFFFFFF + i; + src2.a[i] = 0xFFFFFFFFF + i*i; + } + + for (i = 0; i < SIZE; i++) + { + res.a[i] = 0; + res_ref[i] = 0; + } + + CALC (res_ref, src1.a, src2.a, imm); + res.x = INTRINSIC (_clmulepi64_epi128) (src1.x, src2.x, imm); + + if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c new file mode 100644 index 0000000..61288a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index bebc6dd..0b53023 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -470,6 +470,19 @@ proc check_effective_target_avx512vaes { } { } "-mvaes" ] } +# Return 1 if vpclmulqdq instructions can be compiled. +proc check_effective_target_vpclmulqdq { } { + return [check_no_compiler_messages vpclmulqdq object { + typedef long long __v4di __attribute__ ((__vector_size__ (32))); + + __v4di + _mm256_clmulepi64_epi128 (__v4di __A, __v4di __B) + { + return (__v4di) __builtin_ia32_vpclmulqdq_v4di (__A, __B, 0); + } + } "-mvpclmulqdq -mavx512vl" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 89feeca..62f87f0 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -672,4 +672,9 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index c1ae48b..65f6ccf 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -633,7 +633,6 @@ #define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E) #define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E) - /* avx512vbmi2intrin.h */ #define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1) #define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E) __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E) @@ -672,6 +671,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2") +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq") #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c new file mode 100644 index 0000000..0ce1a06 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <x86intrin.h> + +volatile __m512i x1, x2; +volatile __m256i x3, x4; +volatile __m128i x5, x6; + +void extern +avx512vl_test (void) +{ + x1 = _mm512_clmulepi64_epi128(x1, x2, 3); + x3 = _mm256_clmulepi64_epi128(x3, x4, 3); + x5 = _mm_clmulepi64_epi128(x5, x6, 3); +} + |