diff options
Diffstat (limited to 'gcc')
39 files changed, 762 insertions, 29 deletions
diff --git a/gcc/builtins.cc b/gcc/builtins.cc index b4c4fd0..504b31f 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -1296,8 +1296,8 @@ expand_builtin_prefetch (tree exp) } else op1 = expand_normal (arg1); - /* Argument 1 must be either zero or one. */ - if (INTVAL (op1) != 0 && INTVAL (op1) != 1) + /* Argument 1 must be 0, 1 or 2. */ + if (INTVAL (op1) < 0 || INTVAL (op1) > 2) { warning (0, "invalid second argument to %<__builtin_prefetch%>;" " using zero"); diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index b128c31..14af8bf 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -918,6 +918,8 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_RAOINT); if (edx & bit_USER_MSR) set_feature (FEATURE_USER_MSR); + if (eax & bit_MOVRS) + set_feature (FEATURE_MOVRS); if (avx_usable) { if (eax & bit_AVXVNNI) diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 771bde2..86cacf5 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -136,6 +136,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_TRANSPOSE) #define OPTION_MASK_ISA2_AMX_FP8_SET \ (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_FP8) +#define OPTION_MASK_ISA2_MOVRS_SET OPTION_MASK_ISA2_MOVRS /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same as -msse4.2. */ @@ -334,6 +335,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AMX_TF32_UNSET OPTION_MASK_ISA2_AMX_TF32 #define OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET OPTION_MASK_ISA2_AMX_TRANSPOSE #define OPTION_MASK_ISA2_AMX_FP8_UNSET OPTION_MASK_ISA2_AMX_FP8 +#define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -1480,6 +1482,20 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mmovrs: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_MOVRS_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_MOVRS_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_MOVRS_UNSET; + opts->x_ix86_isa_flags2_explicit |= + OPTION_MASK_ISA2_MOVRS_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 68d6325..f170aaf 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -273,6 +273,7 @@ enum processor_features FEATURE_AMX_TF32, FEATURE_AMX_TRANSPOSE, FEATURE_AMX_FP8, + FEATURE_MOVRS, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index ccd0d19..d454173 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -195,4 +195,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("amx-transpose", FEATURE_AMX_TRANSPOSE, P_NONE, "-mamx-transpose") ISA_NAMES_TABLE_ENTRY("amx-fp8", FEATURE_AMX_FP8, P_NONE, "-mamx-fp8") + ISA_NAMES_TABLE_ENTRY("movrs", FEATURE_MOVRS, P_NONE, "-mmovrs") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 6538c84..a34fb8a 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -459,7 +459,7 @@ i[34567]86-*-* | x86_64-*-*) avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h avx10_2copyintrin.h amxavx512intrin.h amxtf32intrin.h - amxtransposeintrin.h amxfp8intrin.h" + amxtransposeintrin.h amxfp8intrin.h movrsintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index e3dda94..6bed441 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -138,6 +138,7 @@ #define bit_AMX_FP16 (1 << 21) #define bit_HRESET (1 << 22) #define bit_AVXIFMA (1 << 23) +#define bit_MOVRS (1 << 31) /* %edx */ #define bit_AVXVNNIINT8 (1 << 4) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index c603423..fff29b8 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1510,3 +1510,9 @@ DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT) # SM4 builtins DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI) + +# MOVRS builtins +DEF_FUNCTION_TYPE (CHAR, PCCHAR) +DEF_FUNCTION_TYPE (SHORT, PCSHORT) +DEF_FUNCTION_TYPE (INT, PCINT) +DEF_FUNCTION_TYPE (INT64, PCINT64) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 3958027..c484e6d 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -505,6 +505,24 @@ BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide2 BDESC (0, OPTION_MASK_ISA2_PREFETCHI, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT) BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT) +/* MOVRS */ +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsqi, "__builtin_ia32_movrsqi", IX86_BUILTIN_MOVRSQI, UNKNOWN, (int) CHAR_FTYPE_PCCHAR) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrshi, "__builtin_ia32_movrshi", IX86_BUILTIN_MOVRSHI, UNKNOWN, (int) SHORT_FTYPE_PCSHORT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrssi, "__builtin_ia32_movrssi", IX86_BUILTIN_MOVRSSI, UNKNOWN, (int) INT_FTYPE_PCINT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsdi, "__builtin_ia32_movrsdi", IX86_BUILTIN_MOVRSDI, UNKNOWN, (int) INT64_FTYPE_PCINT64) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsbv64qi_mask, "__builtin_ia32_vmovrsb512_mask", IX86_BUILTIN_VMOVRSB_512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsdv16si_mask, "__builtin_ia32_vmovrsd512_mask", IX86_BUILTIN_VMOVRSD_512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrsqv8di_mask, "__builtin_ia32_vmovrsq512_mask", IX86_BUILTIN_VMOVRSQ_512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vmovrswv32hi_mask, "__builtin_ia32_vmovrsw512_mask", IX86_BUILTIN_VMOVRSW_512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsbv32qi_mask, "__builtin_ia32_vmovrsb256_mask", IX86_BUILTIN_VMOVRSB_256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsdv8si_mask, "__builtin_ia32_vmovrsd256_mask", IX86_BUILTIN_VMOVRSD_256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsqv4di_mask, "__builtin_ia32_vmovrsq256_mask", IX86_BUILTIN_VMOVRSQ_256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrswv16hi_mask, "__builtin_ia32_vmovrsw256_mask", IX86_BUILTIN_VMOVRSW_256, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsbv16qi_mask, "__builtin_ia32_vmovrsb128_mask", IX86_BUILTIN_VMOVRSB_128, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsdv4si_mask, "__builtin_ia32_vmovrsd128_mask", IX86_BUILTIN_VMOVRSD_128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrsqv2di_mask, "__builtin_ia32_vmovrsq128_mask", IX86_BUILTIN_VMOVRSQ_128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vmovrswv8hi_mask, "__builtin_ia32_vmovrsw128_mask", IX86_BUILTIN_VMOVRSW_128, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI) + BDESC_END (SPECIAL_ARGS, PURE_ARGS) /* AVX */ diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index f18aa15..31f7e6f 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -749,6 +749,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AMX_TRANSPOSE__"); if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP8) def_or_undef (parse_in, "__AMX_FP8__"); + if (isa_flag2 & OPTION_MASK_ISA2_MOVRS) + def_or_undef (parse_in, "__MOVRS__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 768987c..515334a 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13027,6 +13027,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = load; memory = 0; break; + case CHAR_FTYPE_PCCHAR: + case SHORT_FTYPE_PCSHORT: + case INT_FTYPE_PCINT: + case INT64_FTYPE_PCINT64: case UINT64_FTYPE_PUNSIGNED: case V2DI_FTYPE_PV2DI: case V4DI_FTYPE_PV4DI: diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 1d1ef15..643cc3e 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -127,3 +127,4 @@ DEF_PTA(AMX_AVX512) DEF_PTA(AMX_TF32) DEF_PTA(AMX_TRANSPOSE) DEF_PTA(AMX_FP8) +DEF_PTA(MOVRS) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index d3b26e2..0d0a50a 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -267,7 +267,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 }, { "-mamx-tf32", OPTION_MASK_ISA2_AMX_TF32 }, { "-mamx-transpose", OPTION_MASK_ISA2_AMX_TRANSPOSE }, - { "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 } + { "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 }, + { "-mmovrs", OPTION_MASK_ISA2_MOVRS } }; static struct ix86_target_opts isa_opts[] = { @@ -1140,6 +1141,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32), IX86_ATTR_ISA ("amx-transpose", OPT_mamx_transpose), IX86_ATTR_ISA ("amx-fp8", OPT_mamx_fp8), + IX86_ATTR_ISA ("movrs", OPT_mmovrs), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fb6aaa8..effab29 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -364,6 +364,9 @@ ;; For AMX-TILE UNSPECV_LDTILECFG UNSPECV_STTILECFG + + ;; For MOVRS support + UNSPECV_MOVRS ]) ;; Constants to represent rounding modes in the ROUND instruction @@ -28528,19 +28531,21 @@ [(prefetch (match_operand 0 "address_operand") (match_operand:SI 1 "const_int_operand") (match_operand:SI 2 "const_int_operand"))] - "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW" + "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW + || TARGET_MOVRS" { - bool write = operands[1] != const0_rtx; + int write = INTVAL (operands[1]); int locality = INTVAL (operands[2]); gcc_assert (IN_RANGE (locality, 0, 3)); + gcc_assert (IN_RANGE (write, 0, 2)); /* Use 3dNOW prefetch in case we are asking for write prefetch not supported by SSE counterpart (non-SSE2 athlon machines) or the SSE prefetch is not available (K6 machines). Otherwise use SSE prefetch as it allows specifying of locality. */ - if (write) + if (write == 1) { if (TARGET_PRFCHW) operands[2] = GEN_INT (3); @@ -28548,11 +28553,24 @@ operands[2] = GEN_INT (3); else if (TARGET_PREFETCH_SSE) operands[1] = const0_rtx; - else + else if (write == 0) { gcc_assert (TARGET_3DNOW); operands[2] = GEN_INT (3); } + else + { + if (TARGET_MOVRS) + ; + else if (TARGET_PREFETCH_SSE) + operands[1] = const0_rtx; + else + { + gcc_assert (TARGET_3DNOW); + operands[1] = const0_rtx; + operands[2] = GEN_INT (3); + } + } } else { @@ -28623,6 +28641,18 @@ (symbol_ref "memory_address_length (operands[0], false)")) (set_attr "memory" "none")]) +(define_insn "*prefetch_rst2" + [(prefetch (match_operand 0 "address_operand" "p") + (const_int 2) + (const_int 1))] + "TARGET_MOVRS" + "prefetchrst2\t%a0" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0], false)")) + (set_attr "memory" "none")]) + (define_insn "sse4_2_crc32<mode>" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI @@ -29671,6 +29701,17 @@ (set_attr "prefix" "maybe_evex") (set_attr "memory" "store")]) +(define_insn "movrs<mode>" + [(set (match_operand:SWI1248x 0 "register_operand" "=r") + (unspec_volatile:SWI1248x + [(match_operand:SWI1248x 1 "memory_operand" "m")] + UNSPECV_MOVRS))] + "TARGET_MOVRS && TARGET_64BIT" + "movrs<imodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "prefix" "orig") + (set_attr "type" "other") + (set_attr "mode" "<MODE>")]) + (include "mmx.md") (include "sse.md") (include "sync.md") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index a833f47..83e4e91 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1406,3 +1406,7 @@ Support AMX-TRANSPOSE built-in functions and code generation. mamx-fp8 Target Mask(ISA2_AMX_FP8) Var(ix86_isa_flags2) Save Support AMX-FP8 built-in functions and code generation. + +mmovrs +Target Mask(ISA2_MOVRS) Var(ix86_isa_flags2) Save +Support MOVRS built-in functions and code generation. diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index 425ee27..4a30a87 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -625,3 +625,6 @@ UrlSuffix(gcc/x86-Options.html#index-mamx-transpose) mamx-fp8 UrlSuffix(gcc/x86-Options.html#index-mamx-fp8) +mmovrs +UrlSuffix(gcc/x86-Options.html#index-mmovrs) + diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 7e0f137..ebe0443 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -170,4 +170,6 @@ #include <avx10_2copyintrin.h> +#include <movrsintrin.h> + #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/movrsintrin.h b/gcc/config/i386/movrsintrin.h new file mode 100644 index 0000000..b89ce1c --- /dev/null +++ b/gcc/config/i386/movrsintrin.h @@ -0,0 +1,453 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use <movrsintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _MOVRSINTRIN_H_INCLUDED +#define _MOVRSINTRIN_H_INCLUDED + +#ifndef __MOVRS__ +#pragma GCC push_options +#pragma GCC target("movrs") +#define __DISABLE_MOVRS__ +#endif /* __MOVRS__ */ + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_prefetchrs (void* __P) +{ + __builtin_ia32_prefetch (__P, 2, 1, 0 /* _MM_HINT_RST2 */); +} + +#ifdef __x86_64__ + +extern __inline char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_movrs_i8 (void const * __P) +{ + return (char) __builtin_ia32_movrsqi ((const char *) __P); +} + +extern __inline short +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_movrs_i16 (void const * __P) +{ + return (short) __builtin_ia32_movrshi ((const short *) __P); +} + +extern __inline int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_movrs_i32 (void const * __P) +{ + return (int) __builtin_ia32_movrssi ((const int *) __P); +} + +extern __inline long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_movrs_i64 (void const * __P) +{ + return (long long) __builtin_ia32_movrsdi ((const long long *) __P); +} + +#endif /* __x86_64__ */ + +#ifdef __DISABLE_MOVRS__ +#undef __DISABLE_MOVRS__ +#pragma GCC pop_options +#endif /* __DISABLE_MOVRS__ */ + +#ifdef __x86_64__ + +#if !defined (__AVX10_2_256__) || !defined (__MOVRS__) +#pragma GCC push_options +#pragma GCC target("avx10.2,movrs") +#define __DISABLE_MOVRS_AVX10_2__ +#endif /* __MOVRS_AVX10_2__ */ + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadrs_epi8 (void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_loadrs_epi8 (__m256i __D, __mmask32 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A, + (__v32qi) __D, + (__mmask32) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_loadrs_epi8 (__mmask32 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadrs_epi32 (void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_loadrs_epi32 (__m256i __D, __mmask8 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A, + (__v8si) __D, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_loadrs_epi32 (__mmask8 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadrs_epi64 (void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_loadrs_epi64 (__m256i __D, __mmask8 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A, + (__v4di) __D, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_loadrs_epi64 (__mmask8 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadrs_epi16 (void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_loadrs_epi16 (__m256i __D, __mmask16 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A, + (__v16hi) __D, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_loadrs_epi16 (__mmask16 __U, void const *__A) +{ + return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadrs_epi8 (void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_loadrs_epi8 (__m128i __D, __mmask16 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A, + (__v16qi) __D, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_loadrs_epi8 (__mmask16 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadrs_epi32 (void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_loadrs_epi32 (__m128i __D, __mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A, + (__v4si) __D, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_loadrs_epi32 (__mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadrs_epi64 (void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_loadrs_epi64 (__m128i __D, __mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A, + (__v2di) __D, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_loadrs_epi64 (__mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadrs_epi16 (void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_loadrs_epi16 (__m128i __D, __mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A, + (__v8hi) __D, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_loadrs_epi16 (__mmask8 __U, void const *__A) +{ + return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +#ifdef __DISABLE_MOVRS_AVX10_2__ +#undef __DISABLE_MOVRS_AVX10_2__ +#pragma GCC pop_options +#endif /* __DISABLE_MOVRS_AVX10_2__ */ + +#if !defined (__AVX10_2_512__) || !defined (__MOVRS__) +#pragma GCC push_options +#pragma GCC target("avx10.2-512,movrs") +#define __DISABLE_MOVRS_AVX10_2_512__ +#endif /* __MOVRS_AVX10_2_512__ */ + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadrs_epi8 (void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadrs_epi8 (__m512i __D, __mmask64 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A, + (__v64qi) __D, + (__mmask64) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadrs_epi8 (__mmask64 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadrs_epi32 (void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A, + (__v16si) _mm512_setzero_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadrs_epi32 (__m512i __D, __mmask16 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A, + (__v16si) __D, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadrs_epi32 (__mmask16 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadrs_epi64 (void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadrs_epi64 (__m512i __D, __mmask8 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A, + (__v8di) __D, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadrs_epi64 (__mmask8 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadrs_epi16 (void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_loadrs_epi16 (__m512i __D, __mmask32 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A, + (__v32hi) __D, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_loadrs_epi16 (__mmask32 __U, void const *__A) +{ + return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +#ifdef __DISABLE_MOVRS_AVX10_2_512__ +#undef __DISABLE_MOVRS_AVX10_2_512__ +#pragma GCC pop_options +#endif /* __DISABLE_MOVRS_AVX10_2_512__ */ + +#endif /* __x86_64__ */ + +#endif /* _MOVRSINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 319c3c7..22c6c81 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -251,6 +251,9 @@ UNSPEC_UFIX_SATURATION UNSPEC_MINMAXNEPBF16 UNSPEC_MINMAX + + ;; For MOVRS suppport + UNSPEC_VMOVRS ]) (define_c_enum "unspecv" [ @@ -508,6 +511,12 @@ (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) +(define_mode_iterator VI1248_AVX10_2 + [(V64QI "TARGET_AVX10_2_512") V32QI V16QI + (V32HI "TARGET_AVX10_2_512") V16HI V8HI + (V16SI "TARGET_AVX10_2_512") V8SI V4SI + (V8DI "TARGET_AVX10_2_512") V4DI V2DI]) + (define_mode_iterator VF_AVX512VL [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) @@ -32562,3 +32571,15 @@ "vminmax<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}" [(set_attr "prefix" "evex") (set_attr "mode" "<ssescalarmode>")]) + +(define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>" + [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v") + (unspec:VI1248_AVX10_2 + [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")] + UNSPEC_VMOVRS))] + "TARGET_AVX10_2_256 && TARGET_MOVRS" + "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "memory" "load") + (set_attr "mode" "<sseinsnmode>")]) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 7f10f96..0864b2b 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -38,6 +38,7 @@ enum _mm_hint { _MM_HINT_IT0 = 19, _MM_HINT_IT1 = 18, + _MM_HINT_RST2 = 9, /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ _MM_HINT_ET0 = 7, _MM_HINT_T0 = 3, @@ -52,12 +53,12 @@ enum _mm_hint extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_prefetch (const void *__P, enum _mm_hint __I) { - __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2, + __builtin_ia32_prefetch (__P, (__I & 0xC) >> 2, __I & 0x3, (__I & 0x10) >> 4); } #else #define _mm_prefetch(P, I) \ - __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4) + __builtin_ia32_prefetch ((P), ((I) & 0xC) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4) #endif #ifndef __SSE__ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 0d5b6d1..d851f30 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7564,6 +7564,11 @@ Enable/disable the generation of the AMX-TRANSPOSE instructions. @itemx no-amx-fp8 Enable/disable the generation of the AMX-FP8 instructions. +@cindex @code{target("movrs")} function attribute, x86 +@item movrs +@itemx no-movrs +Enable/disable the generation of the MOVRS instructions. + @cindex @code{target("cld")} function attribute, x86 @item cld @itemx no-cld diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 87d3744..dc7a34b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1486,7 +1486,7 @@ See RS/6000 and PowerPC Options. -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf -musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256 --mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose +-mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops -minline-stringops-dynamically -mstringop-strategy=@var{alg} -mkl -mwidekl @@ -35686,6 +35686,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mamx-fp8 @opindex mamx-fp8 +@need 200 +@opindex mmovrs +@itemx -mmovrs These switches enable the use of instructions in the MMX, SSE, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG, @@ -35697,7 +35700,7 @@ AVX512VPOPCNTDQ, AVX512VNNI, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512, AMX-TF32, AMX-TRANSPOSE, -AMX-FP8 or CLDEMOTE extended instruction sets. Each has a corresponding +AMX-FP8, MOVRS or CLDEMOTE extended instruction sets. Each has a corresponding @option{-mno-} option to disable use of these instructions. These extensions are also available as built-in functions: see diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index 0cb36aa..5debd62 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -3472,11 +3472,12 @@ position of @var{base}, @var{min} and @var{max} to the containing insn and of @var{min} and @var{max} to @var{base}. See rtl.def for details. @findex prefetch -@item (prefetch:@var{m} @var{addr} @var{rw} @var{locality}) +@item (prefetch:@var{m} @var{addr} @var{rws} @var{locality} @var{cache}) Represents prefetch of memory at address @var{addr}. -Operand @var{rw} is 1 if the prefetch is for data to be written, 0 otherwise; -targets that do not support write prefetches should treat this as a normal -prefetch. +Operand @var{rws} is 0 if the prefetch is for data to be read, 1 for being +written; 2 if read shared; +targets that do not support write or read shared prefetches should treat this +as a normal prefetch. Operand @var{locality} specifies the amount of temporal locality; 0 if there is none or 1, 2, or 3 for increasing levels of temporal locality; targets that do not support locality hints should ignore this. diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b627c3c..10fd1b9 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2698,6 +2698,9 @@ Target supporting hardware divmod insn or divmod libcall for SImode. @item hard_float Target supports FPU instructions. +@item movrs +Target supports the execution of @code{movrs} instructions. + @item non_strict_align Target does not require strict alignment. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 8d348d9..ddfdab4 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index accb881..ad77fc7 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c b/gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c index 46e16b8..03620c6 100644 --- a/gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c @@ -10,11 +10,12 @@ void exit (int); #define MODERATE_TEMPORAL_LOCALITY 1 #define HIGH_TEMPORAL_LOCALITY 3 +#define READ_SHARED 2 #define WRITE_ACCESS 1 #define READ_ACCESS 0 enum locality { none, low, moderate, high }; -enum rw { read, write }; +enum rws { read, write, read-shared }; int arr[10]; diff --git a/gcc/testsuite/gcc.dg/builtin-prefetch-1.c b/gcc/testsuite/gcc.dg/builtin-prefetch-1.c index 11beb4e..7f75353 100644 --- a/gcc/testsuite/gcc.dg/builtin-prefetch-1.c +++ b/gcc/testsuite/gcc.dg/builtin-prefetch-1.c @@ -8,7 +8,7 @@ extern void exit (int); enum locality { none, low, moderate, high, bogus }; -enum rw { read, write }; +enum rws { read, write, read-shared }; int arr[10]; @@ -29,7 +29,7 @@ void bad (int *p) { __builtin_prefetch (p, -1, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */ - __builtin_prefetch (p, 2, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */ + __builtin_prefetch (p, 3, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */ __builtin_prefetch (p, bogus, 0); /* { dg-warning "invalid second argument to '__builtin_prefetch'; using zero" } */ __builtin_prefetch (p, 0, -1); /* { dg-warning "invalid third argument to '__builtin_prefetch'; using zero" } */ __builtin_prefetch (p, 0, 4); /* { dg-warning "invalid third argument to '__builtin_prefetch'; using zero" } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index b954374..d013dc7 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mprefetchi -mavx10.2-512" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mprefetchi -mavx10.2-512 -mmovrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/avx-2.c b/gcc/testsuite/gcc.target/i386/avx-2.c index 3f4d735..f1c46f5 100644 --- a/gcc/testsuite/gcc.target/i386/avx-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mmovrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-movrs-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-movrs-1.c new file mode 100644 index 0000000..9166f2e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-movrs-1.c @@ -0,0 +1,40 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -mmovrs -O2" } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %zmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ + +#include <immintrin.h> + +__m512i *px; +volatile __m512i x; +volatile __mmask64 m1; +volatile __mmask16 m2; +volatile __mmask8 m3; +volatile __mmask32 m4; + +void extern +avx512movrs_test (void) +{ + x = _mm512_loadrs_epi8(px); + x = _mm512_mask_loadrs_epi8(x, m1, px); + x = _mm512_maskz_loadrs_epi8(m1, px); + x = _mm512_loadrs_epi32(px); + x = _mm512_mask_loadrs_epi32(x, m2, px); + x = _mm512_maskz_loadrs_epi32(m2, px); + x = _mm512_loadrs_epi64(px); + x = _mm512_mask_loadrs_epi64(x, m3, px); + x = _mm512_maskz_loadrs_epi64(m3, px); + x = _mm512_loadrs_epi16(px); + x = _mm512_mask_loadrs_epi16(x, m4, px); + x = _mm512_maskz_loadrs_epi16(m4, px); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-movrs-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-movrs-1.c new file mode 100644 index 0000000..f8c2f30 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-movrs-1.c @@ -0,0 +1,67 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -mmovrs -O2" } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %ymm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsb\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsd\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsq\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}" 2 } } */ +/* { dg-final { scan-assembler-times "vmovrsw\[ \\t\]\+\\(%r.x\\), %xmm\[0-9\]+{%k\[1-7\]}{z}" 1 } } */ + +#include <immintrin.h> + +__m256i *px1; +volatile __m256i x1; +__m128i *px2; +volatile __m128i x2; +volatile __mmask32 m1; +volatile __mmask8 m2; +volatile __mmask16 m3; + + +void extern +avx512movrs_test (void) +{ + x1 = _mm256_loadrs_epi8(px1); + x1 = _mm256_mask_loadrs_epi8(x1, m1, px1); + x1 = _mm256_maskz_loadrs_epi8(m1, px1); + x1 = _mm256_loadrs_epi32(px1); + x1 = _mm256_mask_loadrs_epi32(x1, m2, px1); + x1 = _mm256_maskz_loadrs_epi32(m2, px1); + x1 = _mm256_loadrs_epi64(px1); + x1 = _mm256_mask_loadrs_epi64(x1, m2, px1); + x1 = _mm256_maskz_loadrs_epi64(m2, px1); + x1 = _mm256_loadrs_epi16(px1); + x1 = _mm256_mask_loadrs_epi16(x1, m3, px1); + x1 = _mm256_maskz_loadrs_epi16(m3, px1); + + x2 = _mm_loadrs_epi8(px2); + x2 = _mm_mask_loadrs_epi8(x2, m3, px2); + x2 = _mm_maskz_loadrs_epi8(m3, px2); + x2 = _mm_loadrs_epi32(px2); + x2 = _mm_mask_loadrs_epi32(x2, m2, px2); + x2 = _mm_maskz_loadrs_epi32(m2, px2); + x2 = _mm_loadrs_epi64(px2); + x2 = _mm_mask_loadrs_epi64(x2, m2, px2); + x2 = _mm_maskz_loadrs_epi64(m2, px2); + x2 = _mm_loadrs_epi16(px2); + x2 = _mm_mask_loadrs_epi16(x2, m2, px2); + x2 = _mm_maskz_loadrs_epi16(m2, px2); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 4aa6d7e..5fdc754 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -93,6 +93,7 @@ extern void test_amx_avx512 (void) __attribute__((__target__("amx-avx512"))); extern void test_amx_tf32 (void) __attribute__((__target__("amx-tf32"))); extern void test_amx_transpose (void) __attribute__((__target__("amx-transpose"))); extern void test_amx_fp8 (void) __attribute__((__target__("amx-fp8"))); +extern void test_movrs (void) __attribute__((__target__("movrs"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); @@ -187,6 +188,7 @@ extern void test_no_amx_avx512 (void) __attribute__((__target__("no-amx-avx512" extern void test_no_amx_tf32 (void) __attribute__((__target__("no-amx-tf32"))); extern void test_no_amx_transpose (void) __attribute__((__target__("no-amx-transpose"))); extern void test_no_amx_fp8 (void) __attribute__((__target__("no-amx-fp8"))); +extern void test_no_movrs (void) __attribute__((__target__("no-movrs"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/movrs-1.c b/gcc/testsuite/gcc.target/i386/movrs-1.c new file mode 100644 index 0000000..b62dc23 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/movrs-1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-mmovrs -O2" } */ +/* { dg-final { scan-assembler-times "movrsb\[ \\t\]\+\\(%r.x\\), %.l" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movrsw\[ \\t\]\+\\(%r.x\\), %.x" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movrsl\[ \\t\]\+\\(%r.x\\), %e.x" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movrsq\[ \\t\]\+\\(%r.x\\), %r.x" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "prefetchrst2\[ \\t\]" 1 } } */ + + +#include <immintrin.h> + +volatile char x1; +volatile short x2; +volatile int x3; +volatile long long x4; +char * p1; +short * p2; +int * p3; +long long * p4; + + +void extern +movrs_test (void) +{ + _m_prefetchrs (p1); +#ifdef __x86_64__ + x1 = _movrs_i8 (p1); + x2 = _movrs_i16 (p2); + x3 = _movrs_i32 (p3); + x4 = _movrs_i64 (p4); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index f9568c9..53480d6 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index ff4ac6a..f0acb38 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 2146cf5..96f1412 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 684ebfc..6a4f48f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 8f3c379..8fffacb 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1082,6 +1082,6 @@ #define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") #include <x86intrin.h> |