diff options
author | Ian Lance Taylor <iant@golang.org> | 2021-09-17 08:46:39 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2021-09-17 08:46:39 -0700 |
commit | a0791d0ed4f147ef347e83f4aedc7ad03f1a2008 (patch) | |
tree | 7b3526910798e4cff7a7200d684383046bac6225 /gcc | |
parent | e252b51ccde010cbd2a146485d8045103cd99533 (diff) | |
parent | 89be17a1b231ade643f28fbe616d53377e069da8 (diff) | |
download | gcc-a0791d0ed4f147ef347e83f4aedc7ad03f1a2008.zip gcc-a0791d0ed4f147ef347e83f4aedc7ad03f1a2008.tar.gz gcc-a0791d0ed4f147ef347e83f4aedc7ad03f1a2008.tar.bz2 |
Merge from trunk revision 89be17a1b231ade643f28fbe616d53377e069da8.
Diffstat (limited to 'gcc')
349 files changed, 21079 insertions, 1669 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6c470c8..bb5576a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,893 @@ +2021-09-16 Bill Schmidt <wschmidt@linux.ibm.com> + + * config/rs6000/rs6000-c.c (rs6000-builtins.h): New include. + (altivec_resolve_new_overloaded_builtin): New forward decl. + (rs6000_new_builtin_type_compatible): New function. + (altivec_resolve_overloaded_builtin): Call + altivec_resolve_new_overloaded_builtin. + (altivec_build_new_resolved_builtin): New function. + (altivec_resolve_new_overloaded_builtin): Likewise. + * config/rs6000/rs6000-call.c (rs6000_new_builtin_is_supported): + Likewise. + * config/rs6000/rs6000-gen-builtins.c (write_decls): Remove _p from + name of rs6000_new_builtin_is_supported. + +2021-09-16 UroÅ¡ Bizjak <ubizjak@gmail.com> + + * config/i386/i386-protos.h (ix86_decompose_address): + Change return type to bool. + * config/i386/i386.c (ix86_decompose_address): Ditto. + +2021-09-16 Tobias Burnus <tobias@codesourcery.com> + + PR target/102353 + * config/rs6000/t-rs6000 (build/rs6000-gen-builtins.o, build/rbtree.o): + Added 'build/' to target, use build/%.o rule. + (build/rs6000-gen-builtins$(build_exeext)): Add 'build/' and + '$(build_exeext)' to target and 'build/' for the *.o files. + (rs6000-builtins.c): Update for those changes; run rs6000-gen-builtins + with $(RUN_GEN). + +2021-09-16 Martin Jambor <mjambor@suse.cz> + + * cgraph.c (cgraph_node::dump): Do not check caller count sums if + the body has been removed. Remove trailing whitespace. + +2021-09-16 Richard Biener <rguenther@suse.de> + + PR middle-end/102360 + * internal-fn.c (expand_DEFERRED_INIT): Make pattern-init + of non-memory more robust. + +2021-09-16 Daniel Cederman <cederman@gaisler.com> + + * config/sparc/sparc-opts.h (enum sparc_processor_type): Add LEON5 + * config/sparc/sparc.c (struct processor_costs): Add LEON5 costs + (leon5_adjust_cost): Increase cost of store with data dependency + on ALU instruction and FPU anti-dependencies. + (sparc_option_override): Add LEON5 costs + (sparc_adjust_cost): Add LEON5 cost adjustments + * config/sparc/sparc.h: Add LEON5 + * config/sparc/sparc.md: Include LEON5 scheduling information + * config/sparc/sparc.opt: Add LEON5 + * doc/invoke.texi: Add LEON5 + * config/sparc/leon5.md: New file. + +2021-09-16 Daniel Cederman <cederman@gaisler.com> + + * config/sparc/sparc.md (stack_protect_set32): Add NOP to prevent + sensitive sequence for B2BST errata workaround. + +2021-09-16 Daniel Cederman <cederman@gaisler.com> + + * config/sparc/sparc.c (sparc_do_work_around_errata): Do not begin + functions with atomic instruction in the UT700 errata workaround. + +2021-09-16 Daniel Cederman <cederman@gaisler.com> + + * config/sparc/sparc.c (next_active_non_empty_insn): New function + that returns next active non empty assembly instruction. + (sparc_do_work_around_errata): Use new function. + +2021-09-16 Daniel Cederman <cederman@gaisler.com> + + * config/sparc/sparc.c (store_insn_p): Add predicate for store + attributes. + (load_insn_p): Add predicate for load attributes. + (sparc_do_work_around_errata): Use new predicates. + +2021-09-16 Andreas Larsson <andreas@gaisler.com> + + * config/sparc/sparc.c (dump_target_flag_bits): Print bit names for + LEON and LEON3. + +2021-09-16 Martin Liska <mliska@suse.cz> + + * config/mips/netbsd.h: Fix typo in name of a macro. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + PR middle-end/102080 + * match.pd: Check mask type when doing cond_op related gimple + simplification. + * tree.c (is_truth_type_for): New function. + * tree.h (is_truth_type_for): New declaration. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h (_mm512_cvtepi32_ph): New + intrinsic. + (_mm512_mask_cvtepi32_ph): Likewise. + (_mm512_maskz_cvtepi32_ph): Likewise. + (_mm512_cvt_roundepi32_ph): Likewise. + (_mm512_mask_cvt_roundepi32_ph): Likewise. + (_mm512_maskz_cvt_roundepi32_ph): Likewise. + (_mm512_cvtepu32_ph): Likewise. + (_mm512_mask_cvtepu32_ph): Likewise. + (_mm512_maskz_cvtepu32_ph): Likewise. + (_mm512_cvt_roundepu32_ph): Likewise. + (_mm512_mask_cvt_roundepu32_ph): Likewise. + (_mm512_maskz_cvt_roundepu32_ph): Likewise. + (_mm512_cvtepi64_ph): Likewise. + (_mm512_mask_cvtepi64_ph): Likewise. + (_mm512_maskz_cvtepi64_ph): Likewise. + (_mm512_cvt_roundepi64_ph): Likewise. + (_mm512_mask_cvt_roundepi64_ph): Likewise. + (_mm512_maskz_cvt_roundepi64_ph): Likewise. + (_mm512_cvtepu64_ph): Likewise. + (_mm512_mask_cvtepu64_ph): Likewise. + (_mm512_maskz_cvtepu64_ph): Likewise. + (_mm512_cvt_roundepu64_ph): Likewise. + (_mm512_mask_cvt_roundepu64_ph): Likewise. + (_mm512_maskz_cvt_roundepu64_ph): Likewise. + (_mm512_cvtepi16_ph): Likewise. + (_mm512_mask_cvtepi16_ph): Likewise. + (_mm512_maskz_cvtepi16_ph): Likewise. + (_mm512_cvt_roundepi16_ph): Likewise. + (_mm512_mask_cvt_roundepi16_ph): Likewise. + (_mm512_maskz_cvt_roundepi16_ph): Likewise. + (_mm512_cvtepu16_ph): Likewise. + (_mm512_mask_cvtepu16_ph): Likewise. + (_mm512_maskz_cvtepu16_ph): Likewise. + (_mm512_cvt_roundepu16_ph): Likewise. + (_mm512_mask_cvt_roundepu16_ph): Likewise. + (_mm512_maskz_cvt_roundepu16_ph): Likewise. + * config/i386/avx512fp16vlintrin.h (_mm_cvtepi32_ph): New + intrinsic. + (_mm_mask_cvtepi32_ph): Likewise. + (_mm_maskz_cvtepi32_ph): Likewise. + (_mm256_cvtepi32_ph): Likewise. + (_mm256_mask_cvtepi32_ph): Likewise. + (_mm256_maskz_cvtepi32_ph): Likewise. + (_mm_cvtepu32_ph): Likewise. + (_mm_mask_cvtepu32_ph): Likewise. + (_mm_maskz_cvtepu32_ph): Likewise. + (_mm256_cvtepu32_ph): Likewise. + (_mm256_mask_cvtepu32_ph): Likewise. + (_mm256_maskz_cvtepu32_ph): Likewise. + (_mm_cvtepi64_ph): Likewise. + (_mm_mask_cvtepi64_ph): Likewise. + (_mm_maskz_cvtepi64_ph): Likewise. + (_mm256_cvtepi64_ph): Likewise. + (_mm256_mask_cvtepi64_ph): Likewise. + (_mm256_maskz_cvtepi64_ph): Likewise. + (_mm_cvtepu64_ph): Likewise. + (_mm_mask_cvtepu64_ph): Likewise. + (_mm_maskz_cvtepu64_ph): Likewise. + (_mm256_cvtepu64_ph): Likewise. + (_mm256_mask_cvtepu64_ph): Likewise. + (_mm256_maskz_cvtepu64_ph): Likewise. + (_mm_cvtepi16_ph): Likewise. + (_mm_mask_cvtepi16_ph): Likewise. + (_mm_maskz_cvtepi16_ph): Likewise. + (_mm256_cvtepi16_ph): Likewise. + (_mm256_mask_cvtepi16_ph): Likewise. + (_mm256_maskz_cvtepi16_ph): Likewise. + (_mm_cvtepu16_ph): Likewise. + (_mm_mask_cvtepu16_ph): Likewise. + (_mm_maskz_cvtepu16_ph): Likewise. + (_mm256_cvtepu16_ph): Likewise. + (_mm256_mask_cvtepu16_ph): Likewise. + (_mm256_maskz_cvtepu16_ph): Likewise. + * config/i386/i386-builtin-types.def: Add corresponding builtin types. + * config/i386/i386-builtin.def: Add corresponding new builtins. + * config/i386/i386-expand.c + (ix86_expand_args_builtin): Handle new builtin types. + (ix86_expand_round_builtin): Ditto. + * config/i386/i386-modes.def: Declare V2HF and V6HF. + * config/i386/sse.md (VI2H_AVX512VL): New. + (qq2phsuff): Ditto. + (sseintvecmode): Add HF vector modes. + (avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>): + New. + (avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>): Ditto. + (*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>): Ditto. + (avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask): Ditto. + (*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask): Ditto. + (*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1): Ditto. + (avx512fp16_vcvt<floatsuffix>qq2ph_v2di): Ditto. + (*avx512fp16_vcvt<floatsuffix>qq2ph_v2di): Ditto. + (avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask): Ditto. + (*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask): Ditto. + (*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1): Ditto. + * config/i386/subst.md (round_qq2phsuff): New subst_attr. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h (_mm512_cvtph_epi32): + New intrinsic/ + (_mm512_mask_cvtph_epi32): Likewise. + (_mm512_maskz_cvtph_epi32): Likewise. + (_mm512_cvt_roundph_epi32): Likewise. + (_mm512_mask_cvt_roundph_epi32): Likewise. + (_mm512_maskz_cvt_roundph_epi32): Likewise. + (_mm512_cvtph_epu32): Likewise. + (_mm512_mask_cvtph_epu32): Likewise. + (_mm512_maskz_cvtph_epu32): Likewise. + (_mm512_cvt_roundph_epu32): Likewise. + (_mm512_mask_cvt_roundph_epu32): Likewise. + (_mm512_maskz_cvt_roundph_epu32): Likewise. + (_mm512_cvtph_epi64): Likewise. + (_mm512_mask_cvtph_epi64): Likewise. + (_mm512_maskz_cvtph_epi64): Likewise. + (_mm512_cvt_roundph_epi64): Likewise. + (_mm512_mask_cvt_roundph_epi64): Likewise. + (_mm512_maskz_cvt_roundph_epi64): Likewise. + (_mm512_cvtph_epu64): Likewise. + (_mm512_mask_cvtph_epu64): Likewise. + (_mm512_maskz_cvtph_epu64): Likewise. + (_mm512_cvt_roundph_epu64): Likewise. + (_mm512_mask_cvt_roundph_epu64): Likewise. + (_mm512_maskz_cvt_roundph_epu64): Likewise. + (_mm512_cvtph_epi16): Likewise. + (_mm512_mask_cvtph_epi16): Likewise. + (_mm512_maskz_cvtph_epi16): Likewise. + (_mm512_cvt_roundph_epi16): Likewise. + (_mm512_mask_cvt_roundph_epi16): Likewise. + (_mm512_maskz_cvt_roundph_epi16): Likewise. + (_mm512_cvtph_epu16): Likewise. + (_mm512_mask_cvtph_epu16): Likewise. + (_mm512_maskz_cvtph_epu16): Likewise. + (_mm512_cvt_roundph_epu16): Likewise. + (_mm512_mask_cvt_roundph_epu16): Likewise. + (_mm512_maskz_cvt_roundph_epu16): Likewise. + * config/i386/avx512fp16vlintrin.h (_mm_cvtph_epi32): + New intrinsic. + (_mm_mask_cvtph_epi32): Likewise. + (_mm_maskz_cvtph_epi32): Likewise. + (_mm256_cvtph_epi32): Likewise. + (_mm256_mask_cvtph_epi32): Likewise. + (_mm256_maskz_cvtph_epi32): Likewise. + (_mm_cvtph_epu32): Likewise. + (_mm_mask_cvtph_epu32): Likewise. + (_mm_maskz_cvtph_epu32): Likewise. + (_mm256_cvtph_epu32): Likewise. + (_mm256_mask_cvtph_epu32): Likewise. + (_mm256_maskz_cvtph_epu32): Likewise. + (_mm_cvtph_epi64): Likewise. + (_mm_mask_cvtph_epi64): Likewise. + (_mm_maskz_cvtph_epi64): Likewise. + (_mm256_cvtph_epi64): Likewise. + (_mm256_mask_cvtph_epi64): Likewise. + (_mm256_maskz_cvtph_epi64): Likewise. + (_mm_cvtph_epu64): Likewise. + (_mm_mask_cvtph_epu64): Likewise. + (_mm_maskz_cvtph_epu64): Likewise. + (_mm256_cvtph_epu64): Likewise. + (_mm256_mask_cvtph_epu64): Likewise. + (_mm256_maskz_cvtph_epu64): Likewise. + (_mm_cvtph_epi16): Likewise. + (_mm_mask_cvtph_epi16): Likewise. + (_mm_maskz_cvtph_epi16): Likewise. + (_mm256_cvtph_epi16): Likewise. + (_mm256_mask_cvtph_epi16): Likewise. + (_mm256_maskz_cvtph_epi16): Likewise. + (_mm_cvtph_epu16): Likewise. + (_mm_mask_cvtph_epu16): Likewise. + (_mm_maskz_cvtph_epu16): Likewise. + (_mm256_cvtph_epu16): Likewise. + (_mm256_mask_cvtph_epu16): Likewise. + (_mm256_maskz_cvtph_epu16): Likewise. + * config/i386/i386-builtin-types.def: Add new builtin types. + * config/i386/i386-builtin.def: Add new builtins. + * config/i386/i386-expand.c + (ix86_expand_args_builtin): Handle new builtin types. + (ix86_expand_round_builtin): Ditto. + * config/i386/sse.md (sseintconvert): New. + (ssePHmode): Ditto. + (UNSPEC_US_FIX_NOTRUNC): Ditto. + (sseintconvertsignprefix): Ditto. + (avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>): + Ditto. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h: (_mm_cvtsi16_si128): + New intrinsic. + (_mm_cvtsi128_si16): Likewise. + (_mm_mask_load_sh): Likewise. + (_mm_maskz_load_sh): Likewise. + (_mm_mask_store_sh): Likewise. + (_mm_move_sh): Likewise. + (_mm_mask_move_sh): Likewise. + (_mm_maskz_move_sh): Likewise. + * config/i386/i386-builtin-types.def: Add corresponding builtin types. + * config/i386/i386-builtin.def: Add corresponding new builtins. + * config/i386/i386-expand.c + (ix86_expand_special_args_builtin): Handle new builtin types. + (ix86_expand_vector_init_one_nonzero): Adjust for FP16 target. + * config/i386/sse.md (VI2F): New mode iterator. + (vec_set<mode>_0): Use new mode iterator. + (avx512f_mov<ssescalarmodelower>_mask): Adjust for HF vector mode. + (avx512f_store<mode>_mask): Ditto. + +2021-09-16 Kewen Lin <linkw@linux.ibm.com> + + * config/rs6000/rs6000.opt (-mtoc-fusion): Remove. + +2021-09-15 David Edelsohn <dje.gcc@gmail.com> + + * config/rs6000/rs6000.c (rs6000_xcoff_encode_section_info): + Proceed if no symbol summary or the symbol alias flag is false. + +2021-09-15 Jakub Jelinek <jakub@redhat.com> + + PR c++/88578 + PR c++/102295 + * varasm.c (output_constructor_regular_field): Instead of assertion + that array_size_for_constructor result is equal to size of + TREE_TYPE (local->val) in bytes, assert that the type size is greater + or equal to array_size_for_constructor result and use type size as + fieldsize. + +2021-09-15 Martin Liska <mliska@suse.cz> + + PR target/102351 + * config/i386/vxworks.h: Use new macro TARGET_CPU_P. + +2021-09-15 Martin Liska <mliska@suse.cz> + + PR target/102349 + * config/rs6000/rs6000.c (rs6000_xcoff_encode_section_info): + Check that we have a symbol summary for a symbol. + +2021-09-15 Richard Biener <rguenther@suse.de> + + PR target/102348 + * config/rs6000/lynx.h: Remove undef of PREFERRED_DEBUGGING_TYPE + to inherit from elfos.h + +2021-09-15 liuhongt <hongtao.liu@intel.com> + + PR target/102327 + * config/i386/i386-expand.c + (ix86_expand_vector_init_interleave): Use puncklwd to pack 2 + HFmodes. + (ix86_expand_vector_set): Use blendw instead of pinsrw. + * config/i386/i386.c (ix86_can_change_mode_class): Adjust for + AVX512FP16 which supports 16bit vector load. + * config/i386/sse.md (avx512bw_interleave_highv32hi<mask_name>): + Rename to .. + (avx512bw_interleave_high<mode><mask_name>): .. this, and + extend to V32HFmode. + (avx2_interleave_highv16hi<mask_name>): Rename to .. + (avx2_interleave_high<mode><mask_name>): .. this, and extend + to V16HFmode. + (vec_interleave_highv8hi<mask_name>): Rename to .. + (vec_interleave_high<mode><mask_name>): .. this, and extend to V8HFmode. + (<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>): + Rename to .. + (<mask_codefor>avx512bw_interleave_low<mode><mask_name>): + this, and extend to V32HFmode. + (avx2_interleave_lowv16hi<mask_name>): Rename to .. + (avx2_interleave_low<mode><mask_name>): .. this, and extend to V16HFmode. + (vec_interleave_lowv8hi<mask_name>): Rename to .. + (vec_interleave_low<mode><mask_name>): .. this, and extend to V8HFmode. + (sse4_1_pblendw): Rename to .. + (sse4_1_pblend<blendsuf>): .. this, and extend to V8HFmode. + (avx2_pblendph): New define_expand. + (<sse2p4_1>_pinsr<ssemodesuffix>): Refactor, use + sseintmodesuffix instead of ssemodesuffix. + (blendsuf): New mode attr. + +2021-09-15 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (dr_misalignment): Move out of line. + (dr_target_alignment): New. + (DR_TARGET_ALIGNMENT): Wrap dr_target_alignment. + (set_dr_target_alignment): New. + (SET_DR_TARGET_ALIGNMENT): Wrap set_dr_target_alignment. + * tree-vect-data-refs.c (dr_misalignment): Compute and + return the group members misalignment. + (vect_compute_data_ref_alignment): Use SET_DR_TARGET_ALIGNMENT. + (vect_analyze_data_refs_alignment): Compute alignment only + for the first element of a DR group. + (vect_slp_analyze_node_alignment): Likewise. + +2021-09-15 Hongyu Wang <hongyu.wang@intel.com> + + * config/i386/avx512fp16intrin.h: Adjust all builtin calls. + * config/i386/avx512fp16vlintrin.h: Likewise. + * config/i386/i386-builtin.def: Adjust builtin name and + enumeration to match AVX512F style. + +2021-09-15 Richard Biener <rguenther@suse.de> + + PR tree-optimization/102318 + * tree-vect-loop.c (vect_transform_cycle_phi): Revert + previous change and do the mode conversion separately from + the sign conversion. + +2021-09-15 Hongtao Liu <hongtao.liu@intel.com> + Peter Cordes <peter@cordes.ca> + + PR target/91103 + * config/i386/sse.md (extract_suf): Add V8SF/V8SI/V4DF/V4DI. + (*vec_extract<mode><ssescalarmodelower>_valign): Output + vextract{i,f}{32x4,64x2} instruction when byte_offset % 16 == + 0. + +2021-09-15 Richard Biener <rguenther@suse.de> + + * config.gcc: Remove vax-*-openbsd* configuration. + +2021-09-15 Richard Biener <rguenther@suse.de> + + * config.gcc: Remove m68k-openbsd. + +2021-09-15 Max Filippov <jcmvbkbc@gmail.com> + + PR target/102336 + * config/xtensa/t-xtensa (TM_H): Add include/xtensa-config.h. + +2021-09-14 Peter Bergner <bergner@linux.ibm.com> + + * config/rs6000/mma.md (unspec): Delete UNSPEC_MMA_XXSETACCZ. + (unspecv): Add UNSPECV_MMA_XXSETACCZ. + (*mma_xxsetaccz): Delete. + (mma_xxsetaccz): Change to define_insn. Remove operand 1. + Use UNSPECV_MMA_XXSETACCZ. Update comment. + * config/rs6000/rs6000.c (rs6000_rtx_costs): Use UNSPECV_MMA_XXSETACCZ. + +2021-09-14 Iain Sandoe <iain@sandoe.co.uk> + + * Makefile.in: Remove variables related to applying no-PIE + to the exes on $build. + * configure: Regenerate. + * configure.ac: Remove configuration related to applying + no-PIE to the exes on $build. + +2021-09-14 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc.md (doloop_end): Add missing mode. + (loop_end): Likewise. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + * gimplify.c (goa_stabilize_expr): Add depth argument, propagate + it to recursive calls, for depth above 7 just gimplify or return. + Perform a test even for MODIFY_EXPR, ADDR_EXPR, COMPOUND_EXPR with + __builtin_clear_padding and TARGET_EXPR. + (gimplify_omp_atomic): Adjust goa_stabilize_expr callers. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h (_mm_fpclass_sh_mask): + New intrinsic. + (_mm_mask_fpclass_sh_mask): Likewise. + (_mm512_mask_fpclass_ph_mask): Likewise. + (_mm512_fpclass_ph_mask): Likewise. + (_mm_getexp_sh): Likewise. + (_mm_mask_getexp_sh): Likewise. + (_mm_maskz_getexp_sh): Likewise. + (_mm512_getexp_ph): Likewise. + (_mm512_mask_getexp_ph): Likewise. + (_mm512_maskz_getexp_ph): Likewise. + (_mm_getexp_round_sh): Likewise. + (_mm_mask_getexp_round_sh): Likewise. + (_mm_maskz_getexp_round_sh): Likewise. + (_mm512_getexp_round_ph): Likewise. + (_mm512_mask_getexp_round_ph): Likewise. + (_mm512_maskz_getexp_round_ph): Likewise. + (_mm_getmant_sh): Likewise. + (_mm_mask_getmant_sh): Likewise. + (_mm_maskz_getmant_sh): Likewise. + (_mm512_getmant_ph): Likewise. + (_mm512_mask_getmant_ph): Likewise. + (_mm512_maskz_getmant_ph): Likewise. + (_mm_getmant_round_sh): Likewise. + (_mm_mask_getmant_round_sh): Likewise. + (_mm_maskz_getmant_round_sh): Likewise. + (_mm512_getmant_round_ph): Likewise. + (_mm512_mask_getmant_round_ph): Likewise. + (_mm512_maskz_getmant_round_ph): Likewise. + * config/i386/avx512fp16vlintrin.h (_mm_mask_fpclass_ph_mask): + New intrinsic. + (_mm_fpclass_ph_mask): Likewise. + (_mm256_mask_fpclass_ph_mask): Likewise. + (_mm256_fpclass_ph_mask): Likewise. + (_mm256_getexp_ph): Likewise. + (_mm256_mask_getexp_ph): Likewise. + (_mm256_maskz_getexp_ph): Likewise. + (_mm_getexp_ph): Likewise. + (_mm_mask_getexp_ph): Likewise. + (_mm_maskz_getexp_ph): Likewise. + (_mm256_getmant_ph): Likewise. + (_mm256_mask_getmant_ph): Likewise. + (_mm256_maskz_getmant_ph): Likewise. + (_mm_getmant_ph): Likewise. + (_mm_mask_getmant_ph): Likewise. + (_mm_maskz_getmant_ph): Likewise. + * config/i386/i386-builtin-types.def: Add corresponding builtin types. + * config/i386/i386-builtin.def: Add corresponding new builtins. + * config/i386/i386-expand.c + (ix86_expand_args_builtin): Handle new builtin types. + (ix86_expand_round_builtin): Ditto. + * config/i386/sse.md (vecmemsuffix): Add HF vector modes. + (<avx512>_getexp<mode><mask_name><round_saeonly_name>): Adjust + to support HF vector modes. + (avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name): + Ditto. + (avx512dq_fpclass<mode><mask_scalar_merge_name>): Ditto. + (avx512dq_vmfpclass<mode><mask_scalar_merge_name>): Ditto. + (<avx512>_getmant<mode><mask_name><round_saeonly_name>): Ditto. + (avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>): + Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h (_mm512_reduce_ph): + New intrinsic. + (_mm512_mask_reduce_ph): Likewise. + (_mm512_maskz_reduce_ph): Likewise. + (_mm512_reduce_round_ph): Likewise. + (_mm512_mask_reduce_round_ph): Likewise. + (_mm512_maskz_reduce_round_ph): Likewise. + (_mm_reduce_sh): Likewise. + (_mm_mask_reduce_sh): Likewise. + (_mm_maskz_reduce_sh): Likewise. + (_mm_reduce_round_sh): Likewise. + (_mm_mask_reduce_round_sh): Likewise. + (_mm_maskz_reduce_round_sh): Likewise. + (_mm512_roundscale_ph): Likewise. + (_mm512_mask_roundscale_ph): Likewise. + (_mm512_maskz_roundscale_ph): Likewise. + (_mm512_roundscale_round_ph): Likewise. + (_mm512_mask_roundscale_round_ph): Likewise. + (_mm512_maskz_roundscale_round_ph): Likewise. + (_mm_roundscale_sh): Likewise. + (_mm_mask_roundscale_sh): Likewise. + (_mm_maskz_roundscale_sh): Likewise. + (_mm_roundscale_round_sh): Likewise. + (_mm_mask_roundscale_round_sh): Likewise. + (_mm_maskz_roundscale_round_sh): Likewise. + * config/i386/avx512fp16vlintrin.h: (_mm_reduce_ph): + New intrinsic. + (_mm_mask_reduce_ph): Likewise. + (_mm_maskz_reduce_ph): Likewise. + (_mm256_reduce_ph): Likewise. + (_mm256_mask_reduce_ph): Likewise. + (_mm256_maskz_reduce_ph): Likewise. + (_mm_roundscale_ph): Likewise. + (_mm_mask_roundscale_ph): Likewise. + (_mm_maskz_roundscale_ph): Likewise. + (_mm256_roundscale_ph): Likewise. + (_mm256_mask_roundscale_ph): Likewise. + (_mm256_maskz_roundscale_ph): Likewise. + * config/i386/i386-builtin-types.def: Add corresponding builtin types. + * config/i386/i386-builtin.def: Add corresponding new builtins. + * config/i386/i386-expand.c + (ix86_expand_args_builtin): Handle new builtin types. + (ix86_expand_round_builtin): Ditto. + * config/i386/sse.md (<mask_codefor>reducep<mode><mask_name>): + Renamed to ... + (<mask_codefor>reducep<mode><mask_name><round_saeonly_name>): + ... this, and adjust for round operands. + (reduces<mode><mask_scalar_name>): Likewise, with ... + (reduces<mode><mask_scalar_name><round_saeonly_scalar_name): + ... this. + (<avx512>_rndscale<mode><mask_name><round_saeonly_name>): + Adjust for HF vector modes. + (avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>): + Ditto. + (*avx512f_rndscale<mode><round_saeonly_name>): Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h: (_mm512_rcp_ph): + New intrinsic. + (_mm512_mask_rcp_ph): Likewise. + (_mm512_maskz_rcp_ph): Likewise. + (_mm_rcp_sh): Likewise. + (_mm_mask_rcp_sh): Likewise. + (_mm_maskz_rcp_sh): Likewise. + (_mm512_scalef_ph): Likewise. + (_mm512_mask_scalef_ph): Likewise. + (_mm512_maskz_scalef_ph): Likewise. + (_mm512_scalef_round_ph): Likewise. + (_mm512_mask_scalef_round_ph): Likewise. + (_mm512_maskz_scalef_round_ph): Likewise. + (_mm_scalef_sh): Likewise. + (_mm_mask_scalef_sh): Likewise. + (_mm_maskz_scalef_sh): Likewise. + (_mm_scalef_round_sh): Likewise. + (_mm_mask_scalef_round_sh): Likewise. + (_mm_maskz_scalef_round_sh): Likewise. + * config/i386/avx512fp16vlintrin.h (_mm_rcp_ph): + New intrinsic. + (_mm256_rcp_ph): Likewise. + (_mm_mask_rcp_ph): Likewise. + (_mm256_mask_rcp_ph): Likewise. + (_mm_maskz_rcp_ph): Likewise. + (_mm256_maskz_rcp_ph): Likewise. + (_mm_scalef_ph): Likewise. + (_mm256_scalef_ph): Likewise. + (_mm_mask_scalef_ph): Likewise. + (_mm256_mask_scalef_ph): Likewise. + (_mm_maskz_scalef_ph): Likewise. + (_mm256_maskz_scalef_ph): Likewise. + * config/i386/i386-builtin.def: Add new builtins. + * config/i386/sse.md (VFH_AVX512VL): New. + (avx512fp16_rcp<mode>2<mask_name>): Ditto. + (avx512fp16_vmrcpv8hf2<mask_scalar_name>): Ditto. + (avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>): + Adjust to support HF vector modes. + (<avx512>_scalef<mode><mask_name><round_name>): Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * config/i386/avx512fp16intrin.h: (_mm512_sqrt_ph): + New intrinsic. + (_mm512_mask_sqrt_ph): Likewise. + (_mm512_maskz_sqrt_ph): Likewise. + (_mm512_sqrt_round_ph): Likewise. + (_mm512_mask_sqrt_round_ph): Likewise. + (_mm512_maskz_sqrt_round_ph): Likewise. + (_mm512_rsqrt_ph): Likewise. + (_mm512_mask_rsqrt_ph): Likewise. + (_mm512_maskz_rsqrt_ph): Likewise. + (_mm_rsqrt_sh): Likewise. + (_mm_mask_rsqrt_sh): Likewise. + (_mm_maskz_rsqrt_sh): Likewise. + (_mm_sqrt_sh): Likewise. + (_mm_mask_sqrt_sh): Likewise. + (_mm_maskz_sqrt_sh): Likewise. + (_mm_sqrt_round_sh): Likewise. + (_mm_mask_sqrt_round_sh): Likewise. + (_mm_maskz_sqrt_round_sh): Likewise. + * config/i386/avx512fp16vlintrin.h (_mm_sqrt_ph): New intrinsic. + (_mm256_sqrt_ph): Likewise. + (_mm_mask_sqrt_ph): Likewise. + (_mm256_mask_sqrt_ph): Likewise. + (_mm_maskz_sqrt_ph): Likewise. + (_mm256_maskz_sqrt_ph): Likewise. + (_mm_rsqrt_ph): Likewise. + (_mm256_rsqrt_ph): Likewise. + (_mm_mask_rsqrt_ph): Likewise. + (_mm256_mask_rsqrt_ph): Likewise. + (_mm_maskz_rsqrt_ph): Likewise. + (_mm256_maskz_rsqrt_ph): Likewise. + * config/i386/i386-builtin-types.def: Add corresponding builtin types. + * config/i386/i386-builtin.def: Add corresponding new builtins. + * config/i386/i386-expand.c + (ix86_expand_args_builtin): Handle new builtins. + (ix86_expand_round_builtin): Ditto. + * config/i386/sse.md (VF_AVX512FP16VL): New. + (sqrt<mode>2): Adjust for HF vector modes. + (<sse>_sqrt<mode>2<mask_name><round_name>): Likewise. + (<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>): + Likewise. + (<sse>_rsqrt<mode>2<mask_name>): New. + (avx512fp16_vmrsqrtv8hf2<mask_scalar_name>): Likewise. + +2021-09-13 Thomas Schwinge <thomas@codesourcery.com> + + PR bootstrap/101574 + * diagnostic-spec.c (warning_suppressed_at, copy_warning): Handle + 'RESERVED_LOCATION_P' locations. + * warning-control.cc (get_nowarn_spec, suppress_warning) + (copy_warning): Likewise. + +2021-09-13 Thomas Schwinge <thomas@codesourcery.com> + + * diagnostic-spec.h (typedef xint_hash_t): Use 'location_t' instead of... + (typedef key_type_t): ... this. Remove. + (nowarn_map): Document. + * diagnostic-spec.c (nowarn_map): Likewise. + * warning-control.cc (convert_to_key): Evolve functions into... + (get_location): ... these. Adjust all users. + +2021-09-13 Thomas Schwinge <thomas@codesourcery.com> + + * warning-control.cc (copy_warning): Remove 'nowarn_map' setup. + +2021-09-13 Jason Merrill <jason@redhat.com> + + * params.opt: Add destructive-interference-size and + constructive-interference-size. + * doc/invoke.texi: Document them. + * config/aarch64/aarch64.c (aarch64_override_options_internal): + Set them. + * config/arm/arm.c (arm_option_override): Set them. + * config/i386/i386-options.c (ix86_option_override_internal): + Set them. + +2021-09-13 Martin Liska <mliska@suse.cz> + H.J. Lu <hjl.tools@gmail.com> + + PR target/101696 + * common/config/i386/cpuinfo.h (cpu_indicator_init): Add support + for x86-64 micro levels for __builtin_cpu_supports. + * common/config/i386/i386-cpuinfo.h (enum feature_priority): + Add priorities for the micro-arch levels. + (enum processor_features): Add new features. + * common/config/i386/i386-isas.h: Add micro-arch features. + * config/i386/i386-builtins.c (get_builtin_code_for_version): + Support the micro-arch levels by callsing + __builtin_cpu_supports. + * doc/extend.texi: Document that the levels are support by + __builtin_cpu_supports. + +2021-09-13 Andrew Pinski <apinski@marvell.com> + + PR target/95969 + * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check): + New function. + (aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK. + (aarch64_general_gimple_fold_builtin): Likewise. + +2021-09-13 Andrew Pinski <apinski@marvell.com> + + * config.gcc: Add m32r-*-linux* and m32rle-*-linux* + to the Unsupported targets list. + Remove support for m32r-*-linux* and m32rle-*-linux*. + * config/m32r/linux.h: Removed. + * config/m32r/t-linux: Removed. + +2021-09-13 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + PR target/102252 + * config/aarch64/aarch64.c (aarch64_classify_address): Don't allow + register index for SVE predicate modes. + +2021-09-13 Aldy Hernandez <aldyh@redhat.com> + + * tree-ssa-threadbackward.c + (back_threader_profitability::profitable_path_p): Remove FSM + references. + (back_threader_registry::register_path): Same. + * tree-ssa-threadedge.c + (jump_threader::simplify_control_stmt_condition): Same. + * tree-ssa-threadupdate.c (jt_path_registry::jt_path_registry): + Add backedge_threads argument. + (fwd_jt_path_registry::fwd_jt_path_registry): Pass + backedge_threads argument. + (back_jt_path_registry::back_jt_path_registry): Same. + (dump_jump_thread_path): Adjust for FSM removal. + (back_jt_path_registry::rewire_first_differing_edge): Same. + (back_jt_path_registry::adjust_paths_after_duplication): Same. + (back_jt_path_registry::update_cfg): Same. + (jt_path_registry::register_jump_thread): Same. + * tree-ssa-threadupdate.h (enum jump_thread_edge_type): Remove + EDGE_FSM_THREAD. + (class back_jt_path_registry): Add backedge_threads to + constructor. + +2021-09-13 Martin Liska <mliska@suse.cz> + + PR c++/101331 + * asan.h (sanitize_coverage_p): Handle when fn == NULL. + +2021-09-13 H.J. Lu <hjl.tools@gmail.com> + + PR target/101935 + * config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): New. + (TARGET_AVX256_STORE_BY_PIECES): Likewise. + (MOVE_MAX): Check TARGET_AVX256_MOVE_BY_PIECES and + TARGET_AVX256_STORE_BY_PIECES instead of + TARGET_AVX256_SPLIT_UNALIGNED_LOAD and + TARGET_AVX256_SPLIT_UNALIGNED_STORE. + (STORE_MAX_PIECES): Check TARGET_AVX256_STORE_BY_PIECES instead + of TARGET_AVX256_SPLIT_UNALIGNED_STORE. + * config/i386/x86-tune.def (X86_TUNE_AVX256_MOVE_BY_PIECES): New. + (X86_TUNE_AVX256_STORE_BY_PIECES): Likewise. + +2021-09-13 liuhongt <hongtao.liu@intel.com> + + PR bootstrap/102302 + * expmed.c (extract_bit_field_using_extv): Use + gen_lowpart_if_possible instead of gen_lowpart to avoid ICE. + +2021-09-13 Aldy Hernandez <aldyh@redhat.com> + + * Makefile.in (OBJS): Add value-pointer-equiv.o. + * gimple-ssa-evrp.c (class ssa_equiv_stack): Move to + value-pointer-equiv.*. + (ssa_equiv_stack::ssa_equiv_stack): Same. + (ssa_equiv_stack::enter): Same. + (ssa_equiv_stack::leave): Same. + (ssa_equiv_stack::push_replacement): Same. + (ssa_equiv_stack::get_replacement): Same. + (is_pointer_ssa): Same. + (class pointer_equiv_analyzer): Same. + (pointer_equiv_analyzer::pointer_equiv_analyzer): Same. + (pointer_equiv_analyzer::~pointer_equiv_analyzer): Same. + (pointer_equiv_analyzer::set_global_equiv): Same. + (pointer_equiv_analyzer::set_cond_equiv): Same. + (pointer_equiv_analyzer::get_equiv): Same. + (pointer_equiv_analyzer::enter): Same. + (pointer_equiv_analyzer::leave): Same. + (pointer_equiv_analyzer::get_equiv_expr): Same. + (pta_valueize): Same. + (pointer_equiv_analyzer::visit_stmt): Same. + (pointer_equiv_analyzer::visit_edge): Same. + (hybrid_folder::value_of_expr): Same. + (hybrid_folder::value_on_edge): Same. + * value-pointer-equiv.cc: New file. + * value-pointer-equiv.h: New file. + +2021-09-13 Richard Earnshaw <rearnsha@arm.com> + + PR target/102125 + * gimple-fold.c (gimple_fold_builtin_memory_op): Allow folding + memcpy if the size is not more than MOVE_MAX * MOVE_RATIO. + +2021-09-13 Richard Earnshaw <rearnsha@arm.com> + + PR target/102125 + * config/arm/arm.md (movmisaligndi): New define_expand. + * config/arm/vec-common.md (movmisalign<mode>): Iterate over VDQ mode. + +2021-09-13 Richard Earnshaw <rearnsha@arm.com> + + PR target/102125 + * emit-rtl.c (gen_highpart): Use adjust_address to handle + MEM rather than calling simplify_gen_subreg. + +2021-09-13 Jan-Benedict Glaw <jbglaw@Å‚ug-owl.de> + + * config/alpha/vms.h (INIT_CUMULATIVE_ARGS): Wrap multi-statment + define into a block. + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/darwin.h (DARWIN_PREFER_DWARF): Do not define. + * config/i386/darwin.h (PREFERRED_DEBUGGING_TYPE): Do not + change based on DARWIN_PREFER_DWARF not being defined. + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/i386/lynx.h: Remove undef of PREFERRED_DEBUGGING_TYPE + to inherit from elfos.h + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config.gcc: Add cr16-*-* to the list of obsoleted targets. + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/avr/elf.h (PREFERRED_DEBUGGING_TYPE): Remove + override, pick up DWARF2_DEBUG define from elfos.h + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/rx/rx.h (PREFERRED_DEBUGGING_TYPE): Always define to + DWARF2_DEBUG. + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/alpha/vms.h (PREFERRED_DEBUGGING_TYPE): Define to + DWARF2_DEBUG. + +2021-09-13 Richard Biener <rguenther@suse.de> + + * config/i386/cygming.h: Always default to DWARF2 debugging. + Do not define DBX_DEBUGGING_INFO, that's done via dbxcoff.h + already. + * doc/install.texi: Document binutils 2.16 as minimum + requirement for mingw. + +2021-09-13 Kewen Lin <linkw@linux.ibm.com> + + * config/rs6000/rs6000.c (struct rs6000_cost_data): New members + nstmts, nloads and extra_ctor_cost. + (rs6000_density_test): Add load density related heuristics. Do + extra costing on vector construction statements if need. + (rs6000_init_cost): Init new members. + (rs6000_update_target_cost_per_stmt): New function. + (rs6000_add_stmt_cost): Factor vect_nonmem hunk out to function + rs6000_update_target_cost_per_stmt and call it. + +2021-09-13 Kewen Lin <linkw@linux.ibm.com> + + * config/rs6000/rs6000.c (struct rs6000_cost_data): Remove typedef. + (rs6000_init_cost): Adjust. + +2021-09-13 liuhongt <hongtao.liu@intel.com> + + * config/i386/i386.md: (UNSPEC_COPYSIGN): Remove. + (UNSPEC_XORSIGN): Ditto. + 2021-09-12 Roger Sayle <roger@nextmovesoftware.com> * expr.c (convert_move): Preserve SUBREG_PROMOTED_VAR_P when diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 14ef576..62910e5 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210913 +20210917 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f387712..b8229ad 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -799,13 +799,8 @@ DIR = ../gcc # Native compiler for the build machine and its switches. CC_FOR_BUILD = @CC_FOR_BUILD@ CXX_FOR_BUILD = @CXX_FOR_BUILD@ -NO_PIE_CFLAGS_FOR_BUILD = @NO_PIE_CFLAGS_FOR_BUILD@ -NO_PIE_FLAG_FOR_BUILD = @NO_PIE_FLAG_FOR_BUILD@ BUILD_CFLAGS= @BUILD_CFLAGS@ $(GENERATOR_CFLAGS) -DGENERATOR_FILE BUILD_CXXFLAGS = @BUILD_CXXFLAGS@ $(GENERATOR_CFLAGS) -DGENERATOR_FILE -BUILD_NO_PIE_CFLAGS = @BUILD_NO_PIE_CFLAGS@ -BUILD_CFLAGS += $(BUILD_NO_PIE_CFLAGS) -BUILD_CXXFLAGS += $(BUILD_NO_PIE_CFLAGS) # Native compiler that we use. This may be C++ some day. COMPILER_FOR_BUILD = $(CXX_FOR_BUILD) @@ -817,8 +812,6 @@ BUILD_LINKERFLAGS = $(BUILD_CXXFLAGS) # Native linker and preprocessor flags. For x-fragment overrides. BUILD_LDFLAGS=@BUILD_LDFLAGS@ -BUILD_NO_PIE_FLAG = @BUILD_NO_PIE_FLAG@ -BUILD_LDFLAGS += $(BUILD_NO_PIE_FLAG) BUILD_CPPFLAGS= -I. -I$(@D) -I$(srcdir) -I$(srcdir)/$(@D) \ -I$(srcdir)/../include @INCINTL@ $(CPPINC) $(CPPFLAGS) diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index ff5fc4e..70aaabf 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,77 @@ +2021-09-15 Alexandre Oliva <oliva@adacore.com> + + * gcc-interface/utils.c: Include opts.h. + (handle_zero_call_used_regs_attribute): New. + (gnat_internal_attribute_table): Add zero_call_used_regs. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + PR ada/101970 + * exp_attr.adb (Expand_N_Attribute_Reference) <Attribute_Enum_Rep>: + Use an unchecked conversion instead of a regular conversion in the + enumeration case and remove Conversion_OK flag in the integer case. + <Attribute_Pos>: Remove superfluous test. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (validate_size): Do not issue an error if the + old size has overflowed. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (gnat_to_gnu_entity): For vector types, make + the representative array the debug type. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (gnat_to_gnu_subprog_type): Turn variable + into constant. Capitalize GCC in warning message. + (intrin_arglists_compatible_p): Change parameter to pointer-to-const + Adjust warning messages. Turn warning into error for vector types. + (intrin_return_compatible_p): Likewise. + (intrin_profiles_compatible_p): Change parameter to pointer-to-const + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * libgnat/s-atopri.ads (bool): Delete. + (Atomic_Test_And_Set): Replace bool with Boolean. + (Atomic_Always_Lock_Free): Likewise. + * libgnat/s-aoinar.adb (Is_Lock_Free): Adjust. + * libgnat/s-aomoar.adb (Is_Lock_Free): Likewise. + * libgnat/s-aotase.adb (Atomic_Test_And_Set): Likewise. + * libgnat/s-atopex.adb (Atomic_Compare_And_Exchange): Likewise. + * gcc-interface/decl.c: Include gimple-expr.h. + (intrin_types_incompatible_p): Delete. + (intrin_arglists_compatible_p): Call types_compatible_p. + (intrin_return_compatible_p): Likewise. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/utils.c (update_pointer_to): Set TYPE_CANONICAL on + pointer and reference types. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + PR ada/101385 + * doc/gnat_ugn/building_executable_programs_with_gnat.rst + (-Wall): Minor fixes. + (-w): Likewise. + (-Werror): Document that it also sets -gnatwe by default. + * gcc-interface/lang-specs.h (ada): Expand -gnatwe if -Werror is + passed and move expansion of -gnatw switches to before -gnatez. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/utils.c (can_materialize_object_renaming_p): Do not + call UI_Is_In_Int_Range on the result of Normalized_First_Bit. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (gnat_to_gnu_entity) <is_type>: Declare new + constant. Adjust error message issued by validate_size in the case + of by-reference types. + (validate_size): Always use the error strings passed by the caller. + 2021-09-08 liuhongt <hongtao.liu@intel.com> * gcc-interface/misc.c (gnat_post_options): Issue an error for diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst index 07c38df..5a69967 100644 --- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst +++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst @@ -4157,16 +4157,16 @@ of the pragma in the :title:`GNAT_Reference_manual`). This switch enables most warnings from the GCC back end. The code generator detects a number of warning situations that are missed by the GNAT front end, and this switch can be used to activate them. - The use of this switch also sets the default front end warning mode to - :switch:`-gnatwa`, that is, most front end warnings activated as well. + The use of this switch also sets the default front-end warning mode to + :switch:`-gnatwa`, that is, most front-end warnings are activated as well. .. index:: -w (gcc) :switch:`-w` Conversely, this switch suppresses warnings from the GCC back end. - The use of this switch also sets the default front end warning mode to - :switch:`-gnatws`, that is, front end warnings suppressed as well. + The use of this switch also sets the default front-end warning mode to + :switch:`-gnatws`, that is, front-end warnings are suppressed as well. .. index:: -Werror (gcc) @@ -4175,6 +4175,9 @@ of the pragma in the :title:`GNAT_Reference_manual`). This switch causes warnings from the GCC back end to be treated as errors. The warning string still appears, but the warning messages are counted as errors, and prevent the generation of an object file. + The use of this switch also sets the default front-end warning mode to + :switch:`-gnatwe`, that is, front-end warning messages and style check + messages are treated as errors as well. A string of warning parameters can be used in the same parameter. For example:: diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb index f074521..fc6b0ef 100644 --- a/gcc/ada/exp_attr.adb +++ b/gcc/ada/exp_attr.adb @@ -3252,14 +3252,15 @@ package body Exp_Attr is -- If not constant-folded, Enum_Type'Enum_Rep (X) or X'Enum_Rep -- expands to - -- target-type (X) + -- target-type!(X) - -- This is simply a direct conversion from the enumeration type to - -- the target integer type, which is treated by the back end as a - -- normal integer conversion, treating the enumeration type as an - -- integer, which is exactly what we want. We set Conversion_OK to - -- make sure that the analyzer does not complain about what otherwise - -- might be an illegal conversion. + -- This is an unchecked conversion from the enumeration type to the + -- target integer type, which is treated by the back end as a normal + -- integer conversion, treating the enumeration type as an integer, + -- which is exactly what we want. Unlike for the Pos attribute, we + -- cannot use a regular conversion since the associated check would + -- involve comparing the converted bounds, i.e. would involve the use + -- of 'Pos instead 'Enum_Rep for these bounds. -- However the target type is universal integer in most cases, which -- is a very large type, so in the case of an enumeration type, we @@ -3267,11 +3268,13 @@ package body Exp_Attr is -- the size information. if Is_Enumeration_Type (Ptyp) then - Rewrite (N, OK_Convert_To (Get_Integer_Type (Ptyp), Expr)); + Rewrite (N, Unchecked_Convert_To (Get_Integer_Type (Ptyp), Expr)); Convert_To_And_Rewrite (Typ, N); + -- Deal with integer types (replace by conversion) + else - Rewrite (N, OK_Convert_To (Typ, Expr)); + Rewrite (N, Convert_To (Typ, Expr)); end if; Analyze_And_Resolve (N, Typ); @@ -5420,7 +5423,7 @@ package body Exp_Attr is -- Deal with integer types (replace by conversion) - elsif Is_Integer_Type (Etyp) then + else Rewrite (N, Convert_To (Typ, Expr)); end if; diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index 5cedb74..0120b21 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -28,6 +28,7 @@ #include "coretypes.h" #include "target.h" #include "tree.h" +#include "gimple-expr.h" #include "stringpool.h" #include "diagnostic-core.h" #include "alias.h" @@ -261,7 +262,7 @@ typedef struct { tree btin_fntype; /* The GCC builtin function type node. */ } intrin_binding_t; -static bool intrin_profiles_compatible_p (intrin_binding_t *); +static bool intrin_profiles_compatible_p (const intrin_binding_t *); /* Given GNAT_ENTITY, a GNAT defining identifier node, which denotes some Ada entity, return the equivalent GCC tree for that entity (a ..._DECL node) @@ -4279,6 +4280,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) handling alignment and possible padding. */ if (is_type && (!gnu_decl || this_made_decl)) { + const bool is_by_ref = Is_By_Reference_Type (gnat_entity); + gcc_assert (!TYPE_IS_DUMMY_P (gnu_type)); /* Process the attributes, if not already done. Note that the type is @@ -4293,15 +4296,18 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) non-constant). */ if (!gnu_size && kind != E_String_Literal_Subtype) { + const char *size_s = "size for %s too small{, minimum allowed is ^}"; + const char *type_s = is_by_ref ? "by-reference type &" : "&"; + if (Known_Esize (gnat_entity)) gnu_size = validate_size (Esize (gnat_entity), gnu_type, gnat_entity, - VAR_DECL, false, false, NULL, NULL); + VAR_DECL, false, false, size_s, type_s); else gnu_size = validate_size (RM_Size (gnat_entity), gnu_type, gnat_entity, TYPE_DECL, false, Has_Size_Clause (gnat_entity), - NULL, NULL); + size_s, type_s); } /* If a size was specified, see if we can make a new type of that size @@ -4614,7 +4620,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) TYPE_ALIGN_OK (gnu_type) = 1; /* Record whether the type is passed by reference. */ - if (Is_By_Reference_Type (gnat_entity) && !VOID_TYPE_P (gnu_type)) + if (is_by_ref && !VOID_TYPE_P (gnu_type)) TYPE_BY_REFERENCE_P (gnu_type) = 1; /* Record whether an alignment clause was specified. */ @@ -4734,6 +4740,14 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) else gnu_decl = create_type_decl (gnu_entity_name, gnu_type, artificial_p, debug_info_p, gnat_entity); + + /* For vector types, make the representative array the debug type. */ + if (VECTOR_TYPE_P (gnu_type)) + { + tree rep = TYPE_REPRESENTATIVE_ARRAY (gnu_type); + TYPE_NAME (rep) = DECL_NAME (gnu_decl); + SET_TYPE_DEBUG_TYPE (gnu_type, rep); + } } /* Otherwise, for a type reusing an existing DECL, back-annotate values. */ @@ -6291,14 +6305,13 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition, the checker is expected to post diagnostics in this case. */ if (gnu_builtin_decl) { - intrin_binding_t inb + const intrin_binding_t inb = { gnat_subprog, gnu_type, TREE_TYPE (gnu_builtin_decl) }; if (!intrin_profiles_compatible_p (&inb)) post_error ("??profile of& doesn''t match the builtin it binds!", gnat_subprog); - return gnu_builtin_decl; } @@ -6309,7 +6322,7 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition, on demand without risking false positives with common default sets of options. */ if (warn_shadow) - post_error ("??gcc intrinsic not found for&!", gnat_subprog); + post_error ("'G'C'C intrinsic not found for&!??", gnat_subprog); } } @@ -9178,13 +9191,12 @@ validate_size (Uint uint_size, tree gnu_type, Entity_Id gnat_object, /* Issue an error either if the default size of the object isn't a constant or if the new size is smaller than it. */ if (TREE_CODE (old_size) != INTEGER_CST - || TREE_OVERFLOW (old_size) - || tree_int_cst_lt (size, old_size)) + || (!TREE_OVERFLOW (old_size) && tree_int_cst_lt (size, old_size))) { char buf[128]; const char *s; - if (kind == FIELD_DECL) + if (s1 && s2) { snprintf (buf, sizeof (buf), s1, s2); s = buf; @@ -9193,6 +9205,7 @@ validate_size (Uint uint_size, tree gnu_type, Entity_Id gnat_object, s = "component size for& too small{, minimum allowed is ^}"; else s = "size for& too small{, minimum allowed is ^}"; + post_error_ne_tree (s, gnat_error_node, gnat_object, old_size); return NULL_TREE; @@ -9486,51 +9499,11 @@ check_ok_for_atomic_type (tree type, Entity_Id gnat_entity, bool component_p) gnat_error_point, gnat_entity); } - -/* Helper for the intrin compatibility checks family. Evaluate whether - two types are definitely incompatible. */ - -static bool -intrin_types_incompatible_p (tree t1, tree t2) -{ - enum tree_code code; - - if (TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2)) - return false; - - if (TYPE_MODE (t1) != TYPE_MODE (t2)) - return true; - - if (TREE_CODE (t1) != TREE_CODE (t2)) - return true; - - code = TREE_CODE (t1); - - switch (code) - { - case INTEGER_TYPE: - case REAL_TYPE: - return TYPE_PRECISION (t1) != TYPE_PRECISION (t2); - - case POINTER_TYPE: - case REFERENCE_TYPE: - /* Assume designated types are ok. We'd need to account for char * and - void * variants to do better, which could rapidly get messy and isn't - clearly worth the effort. */ - return false; - - default: - break; - } - - return false; -} - /* Helper for intrin_profiles_compatible_p, to perform compatibility checks on the Ada/builtin argument lists for the INB binding. */ static bool -intrin_arglists_compatible_p (intrin_binding_t * inb) +intrin_arglists_compatible_p (const intrin_binding_t *inb) { function_args_iterator ada_iter, btin_iter; @@ -9555,27 +9528,32 @@ intrin_arglists_compatible_p (intrin_binding_t * inb) /* If we're done with the Ada args and not with the internal builtin args, or the other way around, complain. */ - if (ada_type == void_type_node - && btin_type != void_type_node) + if (ada_type == void_type_node && btin_type != void_type_node) { - post_error ("??Ada arguments list too short!", inb->gnat_entity); + post_error ("??Ada parameter list too short!", inb->gnat_entity); return false; } - if (btin_type == void_type_node - && ada_type != void_type_node) + if (btin_type == void_type_node && ada_type != void_type_node) { - post_error_ne_num ("??Ada arguments list too long ('> ^)!", + post_error_ne_num ("??Ada parameter list too long ('> ^)!", inb->gnat_entity, inb->gnat_entity, argpos); return false; } /* Otherwise, check that types match for the current argument. */ - argpos ++; - if (intrin_types_incompatible_p (ada_type, btin_type)) + argpos++; + if (!types_compatible_p (ada_type, btin_type)) { - post_error_ne_num ("??intrinsic binding type mismatch on argument ^!", - inb->gnat_entity, inb->gnat_entity, argpos); + /* For vector builtins, issue an error to avoid an ICE. */ + if (VECTOR_TYPE_P (btin_type)) + post_error_ne_num + ("intrinsic binding type mismatch on parameter ^", + inb->gnat_entity, inb->gnat_entity, argpos); + else + post_error_ne_num + ("??intrinsic binding type mismatch on parameter ^!", + inb->gnat_entity, inb->gnat_entity, argpos); return false; } @@ -9591,22 +9569,26 @@ intrin_arglists_compatible_p (intrin_binding_t * inb) on the Ada/builtin return values for the INB binding. */ static bool -intrin_return_compatible_p (intrin_binding_t * inb) +intrin_return_compatible_p (const intrin_binding_t *inb) { tree ada_return_type = TREE_TYPE (inb->ada_fntype); tree btin_return_type = TREE_TYPE (inb->btin_fntype); /* Accept function imported as procedure, common and convenient. */ - if (VOID_TYPE_P (ada_return_type) - && !VOID_TYPE_P (btin_return_type)) + if (VOID_TYPE_P (ada_return_type) && !VOID_TYPE_P (btin_return_type)) return true; /* Check return types compatibility otherwise. Note that this handles void/void as well. */ - if (intrin_types_incompatible_p (btin_return_type, ada_return_type)) + if (!types_compatible_p (btin_return_type, ada_return_type)) { - post_error ("??intrinsic binding type mismatch on return value!", - inb->gnat_entity); + /* For vector builtins, issue an error to avoid an ICE. */ + if (VECTOR_TYPE_P (btin_return_type)) + post_error ("intrinsic binding type mismatch on result", + inb->gnat_entity); + else + post_error ("??intrinsic binding type mismatch on result", + inb->gnat_entity); return false; } @@ -9622,7 +9604,7 @@ intrin_return_compatible_p (intrin_binding_t * inb) especially when binding straight to a compiler internal. */ static bool -intrin_profiles_compatible_p (intrin_binding_t * inb) +intrin_profiles_compatible_p (const intrin_binding_t *inb) { /* Check compatibility on return values and argument lists, each responsible for posting warnings as appropriate. Ensure use of the proper sloc for diff --git a/gcc/ada/gcc-interface/lang-specs.h b/gcc/ada/gcc-interface/lang-specs.h index f5a7496..d26cc8d 100644 --- a/gcc/ada/gcc-interface/lang-specs.h +++ b/gcc/ada/gcc-interface/lang-specs.h @@ -36,7 +36,7 @@ "\ %{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}}\ %{!S:%{!c:%e-c or -S required for Ada}}\ - gnat1 %{I*} %{k8:-gnatk8} %{Wall:-gnatwa} %{w:-gnatws} %{!Q:-quiet}\ + gnat1 %{I*} %{k8:-gnatk8} %{!Q:-quiet}\ %{nostdinc*} %{nostdlib*}\ %{fcompare-debug-second:-gnatd_A} \ %{O*} %{W*} %{w} %{p} %{pg:-p} " ADA_DUMPS_OPTIONS " \ @@ -44,8 +44,9 @@ #if defined(TARGET_VXWORKS_RTP) "%{fRTS=rtp|fRTS=rtp-smp|fRTS=ravenscar-cert-rtp:-mrtp} " #endif - "%{gnatea:-gnatez} %{g*&m*&f*} " - "%1 %{!S:%{o*:%w%*-gnatO}} \ + "%{Wall:-gnatwa} %{Werror:-gnatwe} %{w:-gnatws} \ + %{gnatea:-gnatez} %{g*&m*&f*} \ + %1 %{!S:%{o*:%w%*-gnatO}} \ %i %{S:%W{o*}%{!o*:-o %w%b.s}} \ %{gnatc*|gnats*: -o %j} %{-param*} \ %{!gnatc*:%{!gnats*:%(invoke_as)}}", 0, 0, 0}, diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c index 846d20a..be3f107 100644 --- a/gcc/ada/gcc-interface/utils.c +++ b/gcc/ada/gcc-interface/utils.c @@ -38,6 +38,7 @@ #include "attribs.h" #include "varasm.h" #include "toplev.h" +#include "opts.h" #include "output.h" #include "debug.h" #include "convert.h" @@ -109,6 +110,8 @@ static tree handle_target_attribute (tree *, tree, tree, int, bool *); static tree handle_target_clones_attribute (tree *, tree, tree, int, bool *); static tree handle_vector_size_attribute (tree *, tree, tree, int, bool *); static tree handle_vector_type_attribute (tree *, tree, tree, int, bool *); +static tree handle_zero_call_used_regs_attribute (tree *, tree, tree, int, + bool *); static const struct attribute_spec::exclusions attr_cold_hot_exclusions[] = { @@ -191,6 +194,9 @@ const struct attribute_spec gnat_internal_attribute_table[] = { "may_alias", 0, 0, false, true, false, false, NULL, NULL }, + { "zero_call_used_regs", 1, 1, true, false, false, false, + handle_zero_call_used_regs_attribute, NULL }, + /* ??? format and format_arg are heavy and not supported, which actually prevents support for stdio builtins, which we however declare as part of the common builtins.def contents. */ @@ -4329,6 +4335,7 @@ update_pointer_to (tree old_type, tree new_type) TREE_TYPE (t) = new_type; if (TYPE_NULL_BOUNDS (t)) TREE_TYPE (TREE_OPERAND (TYPE_NULL_BOUNDS (t), 0)) = new_type; + TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_POINTER_TO (new_type)); } /* Chain REF and its variants at the end. */ @@ -4345,7 +4352,10 @@ update_pointer_to (tree old_type, tree new_type) /* Now adjust them. */ for (; ref; ref = TYPE_NEXT_REF_TO (ref)) for (t = TYPE_MAIN_VARIANT (ref); t; t = TYPE_NEXT_VARIANT (t)) - TREE_TYPE (t) = new_type; + { + TREE_TYPE (t) = new_type; + TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_REFERENCE_TO (new_type)); + } TYPE_POINTER_TO (old_type) = NULL_TREE; TYPE_REFERENCE_TO (old_type) = NULL_TREE; @@ -5858,8 +5868,7 @@ can_materialize_object_renaming_p (Node_Id expr) const Uint bitpos = Normalized_First_Bit (Entity (Selector_Name (expr))); - if (!UI_Is_In_Int_Range (bitpos) - || (bitpos != UI_No_Uint && bitpos != UI_From_Int (0))) + if (bitpos != UI_No_Uint && bitpos != Uint_0) return false; expr = Prefix (expr); @@ -6984,6 +6993,59 @@ handle_vector_type_attribute (tree *node, tree name, tree ARG_UNUSED (args), return NULL_TREE; } +/* Handle a "zero_call_used_regs" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_zero_call_used_regs_attribute (tree *node, tree name, tree args, + int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + tree decl = *node; + tree id = TREE_VALUE (args); + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute applies only to functions", name); + *no_add_attrs = true; + return NULL_TREE; + } + + /* pragma Machine_Attribute turns string arguments into identifiers. + Reverse it. */ + if (TREE_CODE (id) == IDENTIFIER_NODE) + id = TREE_VALUE (args) = build_string + (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id)); + + if (TREE_CODE (id) != STRING_CST) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE argument not a string", name); + *no_add_attrs = true; + return NULL_TREE; + } + + bool found = false; + for (unsigned int i = 0; zero_call_used_regs_opts[i].name != NULL; ++i) + if (strcmp (TREE_STRING_POINTER (id), + zero_call_used_regs_opts[i].name) == 0) + { + found = true; + break; + } + + if (!found) + { + error_at (DECL_SOURCE_LOCATION (decl), + "unrecognized %qE attribute argument %qs", + name, TREE_STRING_POINTER (id)); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* ----------------------------------------------------------------------- * * BUILTIN FUNCTIONS * * ----------------------------------------------------------------------- */ diff --git a/gcc/ada/libgnat/s-aoinar.adb b/gcc/ada/libgnat/s-aoinar.adb index df12b16..2f430ed 100644 --- a/gcc/ada/libgnat/s-aoinar.adb +++ b/gcc/ada/libgnat/s-aoinar.adb @@ -203,7 +203,7 @@ package body System.Atomic_Operations.Integer_Arithmetic is pragma Unreferenced (Item); use type Interfaces.C.size_t; begin - return Boolean (Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8)); + return Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8); end Is_Lock_Free; end System.Atomic_Operations.Integer_Arithmetic; diff --git a/gcc/ada/libgnat/s-aomoar.adb b/gcc/ada/libgnat/s-aomoar.adb index c955623..a6f4b0e 100644 --- a/gcc/ada/libgnat/s-aomoar.adb +++ b/gcc/ada/libgnat/s-aomoar.adb @@ -209,7 +209,7 @@ package body System.Atomic_Operations.Modular_Arithmetic is pragma Unreferenced (Item); use type Interfaces.C.size_t; begin - return Boolean (Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8)); + return Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8); end Is_Lock_Free; end System.Atomic_Operations.Modular_Arithmetic; diff --git a/gcc/ada/libgnat/s-aotase.adb b/gcc/ada/libgnat/s-aotase.adb index 5317889..94b28df 100644 --- a/gcc/ada/libgnat/s-aotase.adb +++ b/gcc/ada/libgnat/s-aotase.adb @@ -40,7 +40,7 @@ package body System.Atomic_Operations.Test_And_Set is function Atomic_Test_And_Set (Item : aliased in out Test_And_Set_Flag) return Boolean is begin - return Boolean (Atomic_Test_And_Set (Item'Address)); + return Atomic_Test_And_Set (Item'Address); end Atomic_Test_And_Set; ------------------ diff --git a/gcc/ada/libgnat/s-atopex.adb b/gcc/ada/libgnat/s-atopex.adb index 501254e..b0aa9e5 100644 --- a/gcc/ada/libgnat/s-atopex.adb +++ b/gcc/ada/libgnat/s-atopex.adb @@ -89,36 +89,36 @@ package body System.Atomic_Operations.Exchange is (Ptr : System.Address; Expected : System.Address; Desired : Atomic_Type; - Weak : bool := False; + Weak : Boolean := False; Success_Model : Mem_Model := Seq_Cst; - Failure_Model : Mem_Model := Seq_Cst) return bool; + Failure_Model : Mem_Model := Seq_Cst) return Boolean; pragma Import (Intrinsic, Atomic_Compare_Exchange_1, "__atomic_compare_exchange_1"); function Atomic_Compare_Exchange_2 (Ptr : System.Address; Expected : System.Address; Desired : Atomic_Type; - Weak : bool := False; + Weak : Boolean := False; Success_Model : Mem_Model := Seq_Cst; - Failure_Model : Mem_Model := Seq_Cst) return bool; + Failure_Model : Mem_Model := Seq_Cst) return Boolean; pragma Import (Intrinsic, Atomic_Compare_Exchange_2, "__atomic_compare_exchange_2"); function Atomic_Compare_Exchange_4 (Ptr : System.Address; Expected : System.Address; Desired : Atomic_Type; - Weak : bool := False; + Weak : Boolean := False; Success_Model : Mem_Model := Seq_Cst; - Failure_Model : Mem_Model := Seq_Cst) return bool; + Failure_Model : Mem_Model := Seq_Cst) return Boolean; pragma Import (Intrinsic, Atomic_Compare_Exchange_4, "__atomic_compare_exchange_4"); function Atomic_Compare_Exchange_8 (Ptr : System.Address; Expected : System.Address; Desired : Atomic_Type; - Weak : bool := False; + Weak : Boolean := False; Success_Model : Mem_Model := Seq_Cst; - Failure_Model : Mem_Model := Seq_Cst) return bool; + Failure_Model : Mem_Model := Seq_Cst) return Boolean; pragma Import (Intrinsic, Atomic_Compare_Exchange_8, "__atomic_compare_exchange_8"); pragma Warnings (On); @@ -126,21 +126,17 @@ package body System.Atomic_Operations.Exchange is begin case Atomic_Type'Object_Size is when 8 => - return Boolean - (Atomic_Compare_Exchange_1 - (Item'Address, Prior'Address, Desired)); + return + Atomic_Compare_Exchange_1 (Item'Address, Prior'Address, Desired); when 16 => - return Boolean - (Atomic_Compare_Exchange_2 - (Item'Address, Prior'Address, Desired)); + return + Atomic_Compare_Exchange_2 (Item'Address, Prior'Address, Desired); when 32 => - return Boolean - (Atomic_Compare_Exchange_4 - (Item'Address, Prior'Address, Desired)); + return + Atomic_Compare_Exchange_4 (Item'Address, Prior'Address, Desired); when 64 => - return Boolean - (Atomic_Compare_Exchange_8 - (Item'Address, Prior'Address, Desired)); + return + Atomic_Compare_Exchange_8 (Item'Address, Prior'Address, Desired); when others => raise Program_Error; end case; @@ -154,7 +150,7 @@ package body System.Atomic_Operations.Exchange is pragma Unreferenced (Item); use type Interfaces.C.size_t; begin - return Boolean (Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8)); + return Atomic_Always_Lock_Free (Atomic_Type'Object_Size / 8); end Is_Lock_Free; end System.Atomic_Operations.Exchange; diff --git a/gcc/ada/libgnat/s-atopri.ads b/gcc/ada/libgnat/s-atopri.ads index 2a5ffe5..891b2ed 100644 --- a/gcc/ada/libgnat/s-atopri.ads +++ b/gcc/ada/libgnat/s-atopri.ads @@ -62,9 +62,6 @@ package System.Atomic_Primitives is subtype Mem_Model is Integer range Relaxed .. Last; - type bool is new Boolean; - pragma Convention (C, bool); - ------------------------------------ -- GCC built-in atomic primitives -- ------------------------------------ @@ -137,7 +134,7 @@ package System.Atomic_Primitives is function Atomic_Test_And_Set (Ptr : System.Address; - Model : Mem_Model := Seq_Cst) return bool; + Model : Mem_Model := Seq_Cst) return Boolean; pragma Import (Intrinsic, Atomic_Test_And_Set, "__atomic_test_and_set"); procedure Atomic_Clear @@ -147,7 +144,7 @@ package System.Atomic_Primitives is function Atomic_Always_Lock_Free (Size : Interfaces.C.size_t; - Ptr : System.Address := System.Null_Address) return bool; + Ptr : System.Address := System.Null_Address) return Boolean; pragma Import (Intrinsic, Atomic_Always_Lock_Free, "__atomic_always_lock_free"); diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 03ba64f..5328f85 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,8 @@ +2021-09-16 Maxim Blinov <maxim.blinov@embecosm.com> + + PR bootstrap/102242 + * engine.cc (INCLUDE_UNIQUE_PTR): Define. + 2021-09-08 David Malcolm <dmalcolm@redhat.com> PR analyzer/102225 diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 24f0931..f21f8e5 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ #include "config.h" +#define INCLUDE_UNIQUE_PTR #include "system.h" #include "coretypes.h" #include "tree.h" diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 684d7d0..d4be236 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-09-13 Jason Merrill <jason@redhat.com> + + * c.opt: Add -Winterference-size. + * c-cppbuiltin.c (cpp_atomic_builtins): Add __GCC_DESTRUCTIVE_SIZE + and __GCC_CONSTRUCTIVE_SIZE. + 2021-09-10 Jakub Jelinek <jakub@redhat.com> * c-common.h (c_finish_omp_atomic): Add r and weak arguments. diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c index 75184a3..476abc1 100644 --- a/gcc/c-family/c-omp.c +++ b/gcc/c-family/c-omp.c @@ -376,7 +376,7 @@ c_finish_omp_atomic (location_t loc, enum tree_code code, return error_mark_node; gcc_assert (TREE_CODE (rhs1) == EQ_EXPR); tree cmptype = TREE_TYPE (TREE_OPERAND (rhs1, 0)); - if (SCALAR_FLOAT_TYPE_P (cmptype)) + if (SCALAR_FLOAT_TYPE_P (cmptype) && !test) { bool clear_padding = false; if (BITS_PER_UNIT == 8 && CHAR_BIT == 8) @@ -443,12 +443,14 @@ c_finish_omp_atomic (location_t loc, enum tree_code code, } } } - if (r) + if (r && test) + rtmp = rhs1; + else if (r) { - tree var = create_tmp_var (boolean_type_node); + tree var = create_tmp_var_raw (boolean_type_node); DECL_CONTEXT (var) = current_function_decl; rtmp = build4 (TARGET_EXPR, boolean_type_node, var, - NULL, NULL, NULL); + boolean_false_node, NULL, NULL); save = in_late_binary_op; in_late_binary_op = true; x = build_modify_expr (loc, var, NULL_TREE, NOP_EXPR, @@ -529,14 +531,11 @@ c_finish_omp_atomic (location_t loc, enum tree_code code, } } if (blhs) + x = build3_loc (loc, BIT_FIELD_REF, TREE_TYPE (blhs), x, + bitsize_int (bitsize), bitsize_int (bitpos)); + if (r && !test) { - x = build3_loc (loc, BIT_FIELD_REF, TREE_TYPE (blhs), x, - bitsize_int (bitsize), bitsize_int (bitpos)); - type = TREE_TYPE (blhs); - } - if (r) - { - vtmp = create_tmp_var (TREE_TYPE (x)); + vtmp = create_tmp_var_raw (TREE_TYPE (x)); DECL_CONTEXT (vtmp) = current_function_decl; } else @@ -545,10 +544,11 @@ c_finish_omp_atomic (location_t loc, enum tree_code code, loc, x, NULL_TREE); if (x == error_mark_node) return error_mark_node; - if (r) + type = TREE_TYPE (x); + if (r && !test) { - vtmp = build4 (TARGET_EXPR, boolean_type_node, vtmp, - NULL, NULL, NULL); + vtmp = build4 (TARGET_EXPR, TREE_TYPE (vtmp), vtmp, + build_zero_cst (TREE_TYPE (vtmp)), NULL, NULL); gcc_assert (TREE_CODE (x) == MODIFY_EXPR && TREE_OPERAND (x, 0) == TARGET_EXPR_SLOT (vtmp)); TREE_OPERAND (x, 0) = vtmp; diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index d82c042..eba9bbf 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -18500,7 +18500,7 @@ restart: c_parser_consume_token (parser); goto restart; } - if (structured_block) + if (structured_block && !compare) { opcode = NOP_EXPR; expr = default_function_array_read_conversion (eloc, expr); diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 8f3af00..de07865 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2236,7 +2236,7 @@ cgraph_node::dump (FILE *f) } fprintf (f, "\n"); - if (count.ipa ().initialized_p ()) + if (!body_removed && count.ipa ().initialized_p ()) { bool ok = true; bool min = false; @@ -2245,7 +2245,7 @@ cgraph_node::dump (FILE *f) FOR_EACH_ALIAS (this, ref) if (dyn_cast <cgraph_node *> (ref->referring)->count.initialized_p ()) sum += dyn_cast <cgraph_node *> (ref->referring)->count.ipa (); - + if (inlined_to || (symtab->state < EXPANSION && ultimate_alias_target () == this && only_called_directly_p ())) diff --git a/gcc/combine.c b/gcc/combine.c index 290a366..892c834 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -3063,6 +3063,16 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, return 0; } + /* We cannot safely duplicate volatile references in any case. */ + + if ((added_sets_2 && volatile_refs_p (PATTERN (i2))) + || (added_sets_1 && volatile_refs_p (PATTERN (i1))) + || (added_sets_0 && volatile_refs_p (PATTERN (i0)))) + { + undo_all (); + return 0; + } + /* Count how many auto_inc expressions there were in the original insns; we need to have the same number in the resulting patterns. */ diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 00c65ba..2c9e1cc 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1935,7 +1935,7 @@ const pta processor_alias_table[] = M_CPU_TYPE (INTEL_GOLDMONT), P_PROC_SSE4_2}, {"goldmont-plus", PROCESSOR_GOLDMONT_PLUS, CPU_GLM, PTA_GOLDMONT_PLUS, M_CPU_TYPE (INTEL_GOLDMONT_PLUS), P_PROC_SSE4_2}, - {"tremont", PROCESSOR_TREMONT, CPU_GLM, PTA_TREMONT, + {"tremont", PROCESSOR_TREMONT, CPU_HASWELL, PTA_TREMONT, M_CPU_TYPE (INTEL_TREMONT), P_PROC_SSE4_2}, {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL, M_CPU_TYPE (INTEL_KNL), P_PROC_AVX512F}, diff --git a/gcc/config.gcc b/gcc/config.gcc index d9d6ec3..c3a8b27 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -279,6 +279,7 @@ case ${target} in | i[34567]86-*-go32* \ | m32r-*-linux* \ | m32rle-*-linux* \ + | m68k*-*-openbsd* \ | m68k-*-uclinuxoldabi* \ | mips64orion*-*-rtems* \ | pdp11-*-bsd \ @@ -2342,17 +2343,6 @@ m68k*-*-netbsdelf*) extra_options="${extra_options} netbsd.opt netbsd-elf.opt" tm_defines="${tm_defines} MOTOROLA=1 CHAR_FAST8=1 SHORT_FAST16=1" ;; -m68k*-*-openbsd*) - default_m68k_cpu=68020 - default_cf_cpu=5475 - # needed to unconfuse gdb - tm_defines="${tm_defines} OBSD_OLD_GAS" - tm_file="${tm_file} openbsd.h openbsd-stdint.h openbsd-libpthread.h m68k/openbsd.h" - extra_options="${extra_options} openbsd.opt" - tmake_file="t-openbsd m68k/t-openbsd" - # we need collect2 until our bug is fixed... - use_collect2=yes - ;; m68k-*-uclinux*) # Motorola m68k/ColdFire running uClinux # with uClibc, using the new GNU/Linux-style # ABI. @@ -3521,11 +3511,6 @@ vax-*-netbsdelf*) extra_options="${extra_options} netbsd.opt netbsd-elf.opt vax/elf.opt" tm_defines="${tm_defines} CHAR_FAST8=1 SHORT_FAST16=1" ;; -vax-*-openbsd*) - tm_file="vax/vax.h vax/openbsd1.h openbsd.h openbsd-stdint.h openbsd-libpthread.h vax/openbsd.h" - extra_options="${extra_options} openbsd.opt" - use_collect2=yes - ;; visium-*-elf*) tm_file="dbxelf.h elfos.h ${tm_file} visium/elf.h newlib-stdint.h" tmake_file="visium/t-visium visium/t-crtstuff" diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 90ba85e..4919d27 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -4966,8 +4966,8 @@ core_3, archs4x, archs4xd, archs4xd_slow" (const_int 1)) (label_ref (match_operand 1 "" "")) (pc))) - (set (match_dup 0) (plus (match_dup 0) (const_int -1))) - (unspec [(const_int 0)] UNSPEC_ARC_LP) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (unspec:SI [(const_int 0)] UNSPEC_ARC_LP) (clobber (match_dup 2))])] "" { @@ -4996,8 +4996,8 @@ core_3, archs4x, archs4xd, archs4xd_slow" (const_int 1)) (label_ref (match_operand 1 "" "")) (pc))) - (set (match_dup 0) (plus (match_dup 0) (const_int -1))) - (unspec [(const_int 0)] UNSPEC_ARC_LP) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (unspec:SI [(const_int 0)] UNSPEC_ARC_LP) (clobber (match_scratch:SI 2 "=X,&r"))] "" "@ diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index ed8ad84..a5041ed 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -192,6 +192,159 @@ _mm512_setzero_ph (void) return _mm512_set1_ph (0.0f); } +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ph (void) +{ + __m128h __Y = __Y; + return __Y; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_undefined_ph (void) +{ + __m256h __Y = __Y; + return __Y; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_undefined_ph (void) +{ + __m512h __Y = __Y; + return __Y; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_h (__m128h __A) +{ + return __A[0]; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtsh_h (__m256h __A) +{ + return __A[0]; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsh_h (__m512h __A) +{ + return __A[0]; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_ps (__m512h __a) +{ + return (__m512) __a; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_pd (__m512h __a) +{ + return (__m512d) __a; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_si512 (__m512h __a) +{ + return (__m512i) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph128 (__m512h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph256 (__m512h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph128_ph512 (__m128h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph256_ph512 (__m256h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph128_ph512 (__m128h __A) +{ + return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (), + (__m128) __A, 0); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph256_ph512 (__m256h __A) +{ + return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (), + (__m256d) __A, 0); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castps_ph (__m512 __a) +{ + return (__m512h) __a; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castpd_ph (__m512d __a) +{ + return (__m512h) __a; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castsi512_ph (__m512i __a) +{ + return (__m512h) __a; +} + /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -229,15 +382,15 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_addph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vaddph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_addph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } extern __inline __m512h @@ -251,15 +404,15 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_subph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vsubph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_subph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } extern __inline __m512h @@ -273,15 +426,15 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_mulph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vmulph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_mulph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } extern __inline __m512h @@ -295,15 +448,15 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_divph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vdivph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_divph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } #ifdef __OPTIMIZE__ @@ -311,9 +464,9 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_addph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -321,7 +474,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -329,18 +482,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_addph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_subph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -348,7 +501,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -356,18 +509,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_subph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_mulph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -375,7 +528,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -383,18 +536,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_mulph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_divph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -402,7 +555,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -410,67 +563,67 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_divph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } #else #define _mm512_add_round_ph(A, B, C) \ - ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B), \ - _mm512_setzero_ph (),\ - (__mmask32)-1, (C))) + ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) -#define _mm512_mask_add_round_ph(A, B, C, D, E) \ - ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E))) +#define _mm512_mask_add_round_ph(A, B, C, D, E) \ + ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E))) #define _mm512_maskz_add_round_ph(A, B, C, D) \ - ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C), \ - _mm512_setzero_ph (),\ - (A), (D))) + ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) #define _mm512_sub_round_ph(A, B, C) \ - ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B), \ - _mm512_setzero_ph (),\ - (__mmask32)-1, (C))) + ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) -#define _mm512_mask_sub_round_ph(A, B, C, D, E) \ - ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E))) +#define _mm512_mask_sub_round_ph(A, B, C, D, E) \ + ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E))) #define _mm512_maskz_sub_round_ph(A, B, C, D) \ - ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C), \ - _mm512_setzero_ph (),\ - (A), (D))) + ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) #define _mm512_mul_round_ph(A, B, C) \ - ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B), \ - _mm512_setzero_ph (),\ - (__mmask32)-1, (C))) + ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) -#define _mm512_mask_mul_round_ph(A, B, C, D, E) \ - ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E))) +#define _mm512_mask_mul_round_ph(A, B, C, D, E) \ + ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E))) #define _mm512_maskz_mul_round_ph(A, B, C, D) \ - ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C), \ - _mm512_setzero_ph (),\ - (A), (D))) + ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) #define _mm512_div_round_ph(A, B, C) \ - ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B), \ - _mm512_setzero_ph (),\ - (__mmask32)-1, (C))) + ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) -#define _mm512_mask_div_round_ph(A, B, C, D, E) \ - ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E))) +#define _mm512_mask_div_round_ph(A, B, C, D, E) \ + ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E))) #define _mm512_maskz_div_round_ph(A, B, C, D) \ - ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C), \ - _mm512_setzero_ph (),\ - (A), (D))) + ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) #endif /* __OPTIMIZE__ */ /* Intrinsics of v[add,sub,mul,div]sh. */ extern __inline __m128h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_sh (__m128h __A, __m128h __B) { __A[0] += __B[0]; @@ -481,15 +634,15 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vaddsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_addsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vaddsh_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m128h @@ -504,15 +657,15 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vsubsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_subsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vsubsh_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m128h @@ -527,14 +680,14 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vmulsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_mulsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vmulsh_v8hf_mask (__B, __C, _mm_setzero_ph (), __A); + return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A); } extern __inline __m128h @@ -549,15 +702,15 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vdivsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_divsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vdivsh_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (), + __A); } #ifdef __OPTIMIZE__ @@ -565,9 +718,9 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vaddsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_addsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -575,7 +728,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vaddsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -583,18 +736,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vaddsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_addsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vsubsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_subsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -602,7 +755,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vsubsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -610,18 +763,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vsubsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_subsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vmulsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_mulsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -629,7 +782,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vmulsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -637,18 +790,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vmulsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_mulsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_div_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vdivsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_divsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -656,7 +809,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vdivsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -664,62 +817,62 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vdivsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_divsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } #else #define _mm_add_round_sh(A, B, C) \ - ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) + ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) #define _mm_mask_add_round_sh(A, B, C, D, E) \ - ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((C), (D), (A), (B), (E))) + ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_add_round_sh(A, B, C, D) \ - ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_add_round_sh(A, B, C, D) \ + ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) #define _mm_sub_round_sh(A, B, C) \ - ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) + ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) #define _mm_mask_sub_round_sh(A, B, C, D, E) \ - ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((C), (D), (A), (B), (E))) + ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_sub_round_sh(A, B, C, D) \ - ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_sub_round_sh(A, B, C, D) \ + ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) #define _mm_mul_round_sh(A, B, C) \ - ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) + ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) #define _mm_mask_mul_round_sh(A, B, C, D, E) \ - ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((C), (D), (A), (B), (E))) + ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_mul_round_sh(A, B, C, D) \ - ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_mul_round_sh(A, B, C, D) \ + ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) #define _mm_div_round_sh(A, B, C) \ - ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) + ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) #define _mm_mask_div_round_sh(A, B, C, D, E) \ - ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((C), (D), (A), (B), (E))) + ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_div_round_sh(A, B, C, D) \ - ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_div_round_sh(A, B, C, D) \ + ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) #endif /* __OPTIMIZE__ */ /* Intrinsic vmaxph vminph. */ @@ -727,48 +880,48 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_max_ph (__m512h __A, __m512h __B) { - return __builtin_ia32_vmaxph_v32hf_mask (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1); + return __builtin_ia32_maxph512_mask (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vmaxph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_maxph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vmaxph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_maxph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_min_ph (__m512h __A, __m512h __B) { - return __builtin_ia32_vminph_v32hf_mask (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1); + return __builtin_ia32_minph512_mask (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) { - return __builtin_ia32_vminph_v32hf_mask (__C, __D, __A, __B); + return __builtin_ia32_minph512_mask (__C, __D, __A, __B); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C) { - return __builtin_ia32_vminph_v32hf_mask (__B, __C, - _mm512_setzero_ph (), __A); + return __builtin_ia32_minph512_mask (__B, __C, + _mm512_setzero_ph (), __A); } #ifdef __OPTIMIZE__ @@ -776,9 +929,9 @@ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vmaxph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_maxph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -786,7 +939,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vmaxph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -794,18 +947,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vmaxph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_maxph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C) { - return __builtin_ia32_vminph_v32hf_mask_round (__A, __B, - _mm512_setzero_ph (), - (__mmask32) -1, __C); + return __builtin_ia32_minph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); } extern __inline __m512h @@ -813,7 +966,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D, const int __E) { - return __builtin_ia32_vminph_v32hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E); } extern __inline __m512h @@ -821,37 +974,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return __builtin_ia32_vminph_v32hf_mask_round (__B, __C, - _mm512_setzero_ph (), - __A, __D); + return __builtin_ia32_minph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); } #else -#define _mm512_max_round_ph(A, B, C) \ - (__builtin_ia32_vmaxph_v32hf_mask_round ((A), (B), \ - _mm512_setzero_ph (), \ - (__mmask32)-1, (C))) +#define _mm512_max_round_ph(A, B, C) \ + (__builtin_ia32_maxph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) #define _mm512_mask_max_round_ph(A, B, C, D, E) \ - (__builtin_ia32_vmaxph_v32hf_mask_round ((C), (D), (A), (B), (E))) + (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E))) -#define _mm512_maskz_max_round_ph(A, B, C, D) \ - (__builtin_ia32_vmaxph_v32hf_mask_round ((B), (C), \ - _mm512_setzero_ph (), \ - (A), (D))) +#define _mm512_maskz_max_round_ph(A, B, C, D) \ + (__builtin_ia32_maxph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) -#define _mm512_min_round_ph(A, B, C) \ - (__builtin_ia32_vminph_v32hf_mask_round ((A), (B), \ - _mm512_setzero_ph (), \ - (__mmask32)-1, (C))) +#define _mm512_min_round_ph(A, B, C) \ + (__builtin_ia32_minph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) #define _mm512_mask_min_round_ph(A, B, C, D, E) \ - (__builtin_ia32_vminph_v32hf_mask_round ((C), (D), (A), (B), (E))) + (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E))) -#define _mm512_maskz_min_round_ph(A, B, C, D) \ - (__builtin_ia32_vminph_v32hf_mask_round ((B), (C), \ - _mm512_setzero_ph (), \ - (A), (D))) +#define _mm512_maskz_min_round_ph(A, B, C, D) \ + (__builtin_ia32_minph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) #endif /* __OPTIMIZE__ */ /* Intrinsic vmaxsh vminsh. */ @@ -867,15 +1020,15 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vmaxsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_maxsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vmaxsh_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m128h @@ -890,15 +1043,15 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vminsh_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_minsh_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vminsh_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (), + __A); } #ifdef __OPTIMIZE__ @@ -906,9 +1059,9 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vmaxsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_maxsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -916,7 +1069,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vmaxsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -924,18 +1077,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vmaxsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_maxsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_round_sh (__m128h __A, __m128h __B, const int __C) { - return __builtin_ia32_vminsh_v8hf_mask_round (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1, __C); + return __builtin_ia32_minsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); } extern __inline __m128h @@ -943,7 +1096,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D, const int __E) { - return __builtin_ia32_vminsh_v8hf_mask_round (__C, __D, __A, __B, __E); + return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E); } extern __inline __m128h @@ -951,37 +1104,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return __builtin_ia32_vminsh_v8hf_mask_round (__B, __C, - _mm_setzero_ph (), - __A, __D); + return __builtin_ia32_minsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); } #else -#define _mm_max_round_sh(A, B, C) \ - (__builtin_ia32_vmaxsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) +#define _mm_max_round_sh(A, B, C) \ + (__builtin_ia32_maxsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) -#define _mm_mask_max_round_sh(A, B, C, D, E) \ - (__builtin_ia32_vmaxsh_v8hf_mask_round ((C), (D), (A), (B), (E))) +#define _mm_mask_max_round_sh(A, B, C, D, E) \ + (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_max_round_sh(A, B, C, D) \ - (__builtin_ia32_vmaxsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_max_round_sh(A, B, C, D) \ + (__builtin_ia32_maxsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) -#define _mm_min_round_sh(A, B, C) \ - (__builtin_ia32_vminsh_v8hf_mask_round ((A), (B), \ - _mm_setzero_ph (), \ - (__mmask8)-1, (C))) +#define _mm_min_round_sh(A, B, C) \ + (__builtin_ia32_minsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) -#define _mm_mask_min_round_sh(A, B, C, D, E) \ - (__builtin_ia32_vminsh_v8hf_mask_round ((C), (D), (A), (B), (E))) +#define _mm_mask_min_round_sh(A, B, C, D, E) \ + (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E))) -#define _mm_maskz_min_round_sh(A, B, C, D) \ - (__builtin_ia32_vminsh_v8hf_mask_round ((B), (C), \ - _mm_setzero_ph (), \ - (A), (D))) +#define _mm_maskz_min_round_sh(A, B, C, D) \ + (__builtin_ia32_minsh_mask_round ((B), (C), \ + _mm_setzero_ph (), \ + (A), (D))) #endif /* __OPTIMIZE__ */ @@ -991,8 +1144,8 @@ extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C) { - return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__A, __B, __C, - (__mmask32) -1); + return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C, + (__mmask32) -1); } extern __inline __mmask32 @@ -1000,8 +1153,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, const int __D) { - return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__B, __C, __D, - __A); + return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D, + __A); } extern __inline __mmask32 @@ -1009,9 +1162,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C, const int __D) { - return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__A, __B, - __C, (__mmask32) -1, - __D); + return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B, + __C, (__mmask32) -1, + __D); } extern __inline __mmask32 @@ -1019,23 +1172,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C, const int __D, const int __E) { - return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__B, __C, - __D, __A, - __E); + return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C, + __D, __A, + __E); } #else #define _mm512_cmp_ph_mask(A, B, C) \ - (__builtin_ia32_vcmpph_v32hf_mask ((A), (B), (C), (-1))) + (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1))) #define _mm512_mask_cmp_ph_mask(A, B, C, D) \ - (__builtin_ia32_vcmpph_v32hf_mask ((B), (C), (D), (A))) + (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A))) -#define _mm512_cmp_round_ph_mask(A, B, C, D) \ - (__builtin_ia32_vcmpph_v32hf_mask_round ((A), (B), (C), (-1), (D))) +#define _mm512_cmp_round_ph_mask(A, B, C, D) \ + (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D))) -#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \ - (__builtin_ia32_vcmpph_v32hf_mask_round ((B), (C), (D), (A), (E))) +#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \ + (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E))) #endif /* __OPTIMIZE__ */ @@ -1046,9 +1199,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C) { return (__mmask8) - __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, - __C, (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + __builtin_ia32_cmpsh_mask_round (__A, __B, + __C, (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __mmask8 @@ -1057,9 +1210,9 @@ _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { return (__mmask8) - __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C, - __D, __A, - _MM_FROUND_CUR_DIRECTION); + __builtin_ia32_cmpsh_mask_round (__B, __C, + __D, __A, + _MM_FROUND_CUR_DIRECTION); } extern __inline __mmask8 @@ -1067,9 +1220,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C, const int __D) { - return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, - __C, (__mmask8) -1, - __D); + return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B, + __C, (__mmask8) -1, + __D); } extern __inline __mmask8 @@ -1077,25 +1230,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C, const int __D, const int __E) { - return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C, - __D, __A, - __E); + return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C, + __D, __A, + __E); } #else -#define _mm_cmp_sh_mask(A, B, C) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), \ - (_MM_FROUND_CUR_DIRECTION))) +#define _mm_cmp_sh_mask(A, B, C) \ + (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \ + (_MM_FROUND_CUR_DIRECTION))) -#define _mm_mask_cmp_sh_mask(A, B, C, D) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A), \ - (_MM_FROUND_CUR_DIRECTION))) +#define _mm_mask_cmp_sh_mask(A, B, C, D) \ + (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \ + (_MM_FROUND_CUR_DIRECTION))) -#define _mm_cmp_round_sh_mask(A, B, C, D) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), (D))) +#define _mm_cmp_round_sh_mask(A, B, C, D) \ + (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D))) -#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A), (E))) +#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \ + (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E))) #endif /* __OPTIMIZE__ */ @@ -1104,137 +1257,3792 @@ extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comieq_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OS, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comilt_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OS, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comile_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OS, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comigt_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OS, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comige_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OS, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comineq_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_US, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomieq_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomilt_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomile_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomigt_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomige_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ucomineq_sh (__m128h __A, __m128h __B) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_UQ, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } #ifdef __OPTIMIZE__ extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) - _mm_comi_sh (__m128h __A, __m128h __B, const int __P) +_mm_comi_sh (__m128h __A, __m128h __B, const int __P) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R) { - return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P, - (__mmask8) -1,__R); + return __builtin_ia32_cmpsh_mask_round (__A, __B, __P, + (__mmask8) -1,__R); } #else -#define _mm_comi_round_sh(A, B, P, R) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), (R))) -#define _mm_comi_sh(A, B, P) \ - (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), \ - _MM_FROUND_CUR_DIRECTION)) +#define _mm_comi_round_sh(A, B, P, R) \ + (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R))) +#define _mm_comi_sh(A, B, P) \ + (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) #endif /* __OPTIMIZE__ */ +/* Intrinsics vsqrtph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_ph (__m512h __A) +{ + return __builtin_ia32_sqrtph512_mask_round (__A, + _mm512_setzero_ph(), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_sqrtph512_mask_round (__B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_round_ph (__m512h __A, const int __B) +{ + return __builtin_ia32_sqrtph512_mask_round (__A, + _mm512_setzero_ph(), + (__mmask32) -1, __B); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C) +{ + return __builtin_ia32_sqrtph512_mask_round (__B, + _mm512_setzero_ph (), + __A, __C); +} + +#else +#define _mm512_sqrt_round_ph(A, B) \ + (__builtin_ia32_sqrtph512_mask_round ((A), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (B))) + +#define _mm512_mask_sqrt_round_ph(A, B, C, D) \ + (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_sqrt_round_ph(A, B, C) \ + (__builtin_ia32_sqrtph512_mask_round ((B), \ + _mm512_setzero_ph (), \ + (A), (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vrsqrtph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt_ph (__m512h __A) +{ + return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (), + (__mmask32) -1); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_rsqrtph512_mask (__C, __A, __B); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (), + __A); +} + +/* Intrinsics vrsqrtsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (), + __A); +} + +/* Intrinsics vsqrtsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_sqrtsh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_sqrtsh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_sqrtsh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B, + __E); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_sqrtsh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __D); +} + +#else +#define _mm_sqrt_round_sh(A, B, C) \ + (__builtin_ia32_sqrtsh_mask_round ((B), (A), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) + +#define _mm_mask_sqrt_round_sh(A, B, C, D, E) \ + (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E))) + +#define _mm_maskz_sqrt_round_sh(A, B, C, D) \ + (__builtin_ia32_sqrtsh_mask_round ((C), (B), \ + _mm_setzero_ph (), \ + (A), (D))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vrcpph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp_ph (__m512h __A) +{ + return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (), + (__mmask32) -1); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C) +{ + return __builtin_ia32_rcpph512_mask (__C, __A, __B); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B) +{ + return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (), + __A); +} + +/* Intrinsics vrcpsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (), + __A); +} + +/* Intrinsics vscalefph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_ph (__m512h __A, __m512h __B) +{ + return __builtin_ia32_scalefph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D) +{ + return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C) +{ + return __builtin_ia32_scalefph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C) +{ + return __builtin_ia32_scalefph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + __m512h __D, const int __E) +{ + return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B, + __E); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C, + const int __D) +{ + return __builtin_ia32_scalefph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} + +#else +#define _mm512_scalef_round_ph(A, B, C) \ + (__builtin_ia32_scalefph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) + +#define _mm512_mask_scalef_round_ph(A, B, C, D, E) \ + (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E))) + +#define _mm512_maskz_scalef_round_ph(A, B, C, D) \ + (__builtin_ia32_scalefph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vscalefsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_sh (__m128h __A, __m128h __B) +{ + return __builtin_ia32_scalefsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_scalefsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C) +{ + return __builtin_ia32_scalefsh_mask_round (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1, __C); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, const int __E) +{ + return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B, + __E); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + const int __D) +{ + return __builtin_ia32_scalefsh_mask_round (__B, __C, + _mm_setzero_ph (), + __A, __D); +} + +#else +#define _mm_scalef_round_sh(A, B, C) \ + (__builtin_ia32_scalefsh_mask_round ((A), (B), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (C))) + +#define _mm_mask_scalef_round_sh(A, B, C, D, E) \ + (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E))) + +#define _mm_maskz_scalef_round_sh(A, B, C, D) \ + (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \ + (A), (D))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vreduceph. */ +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_ph (__m512h __A, int __B) +{ + return __builtin_ia32_reduceph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D) +{ + return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C) +{ + return __builtin_ia32_reduceph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_round_ph (__m512h __A, int __B, const int __C) +{ + return __builtin_ia32_reduceph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, __C); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C, + int __D, const int __E) +{ + return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B, + __E); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C, + const int __D) +{ + return __builtin_ia32_reduceph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} + +#else +#define _mm512_reduce_ph(A, B) \ + (__builtin_ia32_reduceph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_reduce_ph(A, B, C, D) \ + (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_reduce_ph(A, B, C) \ + (__builtin_ia32_reduceph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_reduce_round_ph(A, B, C) \ + (__builtin_ia32_reduceph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) + +#define _mm512_mask_reduce_round_ph(A, B, C, D, E) \ + (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E))) + +#define _mm512_maskz_reduce_round_ph(A, B, C, D) \ + (__builtin_ia32_reduceph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vreducesh. */ +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_sh (__m128h __A, __m128h __B, int __C) +{ + return __builtin_ia32_reducesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E) +{ + return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) +{ + return __builtin_ia32_reducesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D) +{ + return __builtin_ia32_reducesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E, const int __F) +{ + return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, + __B, __F); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + int __D, const int __E) +{ + return __builtin_ia32_reducesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), + __A, __E); +} + +#else +#define _mm_reduce_sh(A, B, C) \ + (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_reduce_sh(A, B, C, D, E) \ + (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_reduce_sh(A, B, C, D) \ + (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \ + _mm_setzero_ph (), \ + (A), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_reduce_round_sh(A, B, C, D) \ + (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (D))) + +#define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \ + (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F))) + +#define _mm_maskz_reduce_round_sh(A, B, C, D, E) \ + (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \ + _mm_setzero_ph (), \ + (A), (E))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vrndscaleph. */ +#ifdef __OPTIMIZE__ +extern __inline __m512h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_ph (__m512h __A, int __B) +{ + return __builtin_ia32_rndscaleph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B, + __m512h __C, int __D) +{ + return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C) +{ + return __builtin_ia32_rndscaleph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C) +{ + return __builtin_ia32_rndscaleph512_mask_round (__A, __B, + _mm512_setzero_ph (), + (__mmask32) -1, + __C); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B, + __m512h __C, int __D, const int __E) +{ + return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, + __B, __E); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C, + const int __D) +{ + return __builtin_ia32_rndscaleph512_mask_round (__B, __C, + _mm512_setzero_ph (), + __A, __D); +} + +#else +#define _mm512_roundscale_ph(A, B) \ + (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_roundscale_ph(A, B, C, D) \ + (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_roundscale_ph(A, B, C) \ + (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm512_roundscale_round_ph(A, B, C) \ + (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, (C))) + +#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \ + (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E))) + +#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \ + (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \ + _mm512_setzero_ph (), \ + (A), (D))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vrndscalesh. */ +#ifdef __OPTIMIZE__ +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roundscale_sh (__m128h __A, __m128h __B, int __C) +{ + return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E) +{ + return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D) +{ + return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), __A, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D) +{ + return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C, + _mm_setzero_ph (), + (__mmask8) -1, + __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C, + __m128h __D, int __E, const int __F) +{ + return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, + __A, __B, __F); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C, + int __D, const int __E) +{ + return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D, + _mm_setzero_ph (), + __A, __E); +} + +#else +#define _mm_roundscale_sh(A, B, C) \ + (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_roundscale_sh(A, B, C, D, E) \ + (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_roundscale_sh(A, B, C, D) \ + (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \ + _mm_setzero_ph (), \ + (A), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_roundscale_round_sh(A, B, C, D) \ + (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (D))) + +#define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \ + (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F))) + +#define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \ + (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \ + _mm_setzero_ph (), \ + (A), (E))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfpclasssh. */ +#ifdef __OPTIMIZE__ +extern __inline __mmask8 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fpclass_sh_mask (__m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, + (__mmask8) -1); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U); +} + +#else +#define _mm_fpclass_sh_mask(X, C) \ + ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \ + (int) (C), (__mmask8) (-1))) \ + +#define _mm_mask_fpclass_sh_mask(U, X, C) \ + ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \ + (int) (C), (__mmask8) (U))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfpclassph. */ +#ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A, + const int __imm) +{ + return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, + __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fpclass_ph_mask (__m512h __A, const int __imm) +{ + return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A, + __imm, + (__mmask32) -1); +} + +#else +#define _mm512_mask_fpclass_ph_mask(u, x, c) \ + ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ + (int) (c),(__mmask8)(u))) + +#define _mm512_fpclass_ph_mask(x, c) \ + ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ + (int) (c),(__mmask8)-1)) +#endif /* __OPIMTIZE__ */ + +/* Intrinsics vgetexpph, vgetexpsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_sh (__m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) __W, (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B) +{ + return (__m128h) + __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__v8hf) _mm_setzero_ph (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_ph (__m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) _mm512_setzero_ph (), + (__mmask32) -1, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W, + (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A) +{ + return (__m512h) + __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) _mm512_setzero_ph (), + (__mmask32) __U, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + _mm_setzero_ph (), + (__mmask8) -1, + __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B, + const int __R) +{ + return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_round_ph (__m512h __A, const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) __W, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, __R); +} + +#else +#define _mm_getexp_round_sh(A, B, R) \ + ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \ + (__v8hf)(__m128h)(B), \ + (__v8hf)_mm_setzero_ph(), \ + (__mmask8)-1, R)) + +#define _mm_mask_getexp_round_sh(W, U, A, B, C) \ + (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C) + +#define _mm_maskz_getexp_round_sh(U, A, B, C) \ + (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \ + (__v8hf)_mm_setzero_ph(), \ + U, C) + +#define _mm512_getexp_round_ph(A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R)) + +#define _mm512_mask_getexp_round_ph(W, U, A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)(__m512h)(W), (__mmask32)(U), R)) + +#define _mm512_maskz_getexp_round_ph(U, A, R) \ + ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R)) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vgetmantph, vgetmantsh. */ +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_sh (__m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, (__v8hf) __W, + __U, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D) +{ + return (__m128h) + __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) _mm_setzero_ph(), + __U, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) __W, __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) + _mm512_setzero_ph (), + __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_round_sh (__m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + _mm_setzero_ph (), + (__mmask8) -1, + __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A, + __m128h __B, _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) __W, + __U, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B, + _MM_MANTISSA_NORM_ENUM __C, + _MM_MANTISSA_SIGN_ENUM __D, const int __R) +{ + return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, + (__v8hf) __B, + (__D << 2) | __C, + (__v8hf) + _mm_setzero_ph(), + __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + _mm512_setzero_ph (), + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) __W, __U, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C, const int __R) +{ + return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A, + (__C << 2) | __B, + (__v32hf) + _mm512_setzero_ph (), + __U, __R); +} + +#else +#define _mm512_getmant_ph(X, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h) \ + _mm512_setzero_ph(), \ + (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_getmant_ph(W, U, X, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h)(W), \ + (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + + +#define _mm512_maskz_getmant_ph(U, X, B, C) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h) \ + _mm512_setzero_ph(), \ + (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_getmant_sh(X, Y, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h) \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_getmant_sh(W, U, X, Y, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h)(W), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_getmant_sh(U, X, Y, C, D) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h) \ + _mm_setzero_ph(), \ + (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_getmant_round_ph(X, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h) \ + _mm512_setzero_ph(), \ + (__mmask32)-1, \ + (R))) + +#define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h)(W), \ + (__mmask32)(U), \ + (R))) + + +#define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \ + ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \ + (int)(((C)<<2) | (B)), \ + (__v32hf)(__m512h) \ + _mm512_setzero_ph(), \ + (__mmask32)(U), \ + (R))) + +#define _mm_getmant_round_sh(X, Y, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h) \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + (R))) + +#define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h)(W), \ + (__mmask8)(U), \ + (R))) + +#define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \ + ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \ + (__v8hf)(__m128h)(Y), \ + (int)(((D)<<2) | (C)), \ + (__v8hf)(__m128h) \ + _mm_setzero_ph(), \ + (__mmask8)(U), \ + (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vmovw. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi16_si128 (short __A) +{ + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi128_si16 (__m128i __A) +{ + return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0); +} + +/* Intrinsics vmovsh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C) +{ + return __builtin_ia32_loadsh_mask (__C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B) +{ + return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C) +{ + __builtin_ia32_storesh_mask (__A, __C, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_sh (__m128h __A, __m128h __B) +{ + __A[0] = __B[0]; + return __A; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A); +} + +/* Intrinsics vcvtph2dq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epi32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2dq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2udq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2udq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2dq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2dq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16)(-1), (B))) + +#define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((C), \ + (__v16si)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2dq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2udq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu32 (__m256h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu32 (__m256h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B, + __m256h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__C, + (__v16si) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2udq512_mask_round (__B, + (__v16si) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu32(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((A), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((C), \ + (__v16si)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2udq512_mask_round ((B), \ + (__v16si) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtdq2ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi32_ph (__m512i __A) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi32_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C, + __A, + __B, + __D); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepi32_ph(A, B) \ + (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \ + _mm256_setzero_ph (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \ + (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \ + (A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \ + (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \ + _mm256_setzero_ph (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtudq2ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu32_ph (__m512i __A) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu32_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C, + __A, + __B, + __D); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B, + _mm256_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepu32_ph(A, B) \ + (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \ + _mm256_setzero_ph (), \ + (__mmask16)-1, \ + B)) + +#define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \ + (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \ + A, \ + B, \ + D)) + +#define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \ + (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \ + _mm256_setzero_ph (), \ + A, \ + C)) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2qq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi64(A, B) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epi64(A, B, C) \ + (__builtin_ia32_vcvtph2qq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2uqq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu64(A, B) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu64(A, B, C) \ + (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2qq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2qq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi64(A, B) \ + (__builtin_ia32_vcvttph2qq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \ + __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D)) + +#define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \ + (__builtin_ia32_vcvttph2qq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2uqq. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu64 (__m128h __A, int __B) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__A, + _mm512_setzero_si512 (), + (__mmask8) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvttph2uqq512_mask_round (__B, + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu64(A, B) \ + (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \ + _mm512_setzero_si512 (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \ + __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D)) + +#define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \ + (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtqq2ph. */ +extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi64_ph (__m512i __A) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi64_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepi64_ph(A, B) \ + (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \ + (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \ + (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \ + _mm_setzero_ph (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtuqq2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepu64_ph (__m512i __A) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu64_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C, + __A, + __B, + __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B, + _mm_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepu64_ph(A, B) \ + (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \ + _mm_setzero_ph (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \ + (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \ + (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \ + _mm_setzero_ph (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2w. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epi16(A, B) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvt_roundph_epi16(A, B, C) \ + ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2uw. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvtph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_epu16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, (B))) + +#define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_epu16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvtph2uw512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2w. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epi16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epi16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2w512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epi16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2w512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvttph2uw. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvttph_epu16 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtt_roundph_epu16 (__m512h __A, int __B) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) -1, + __B); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B, + __m512h __C, int __D) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__C, + (__v32hi) __A, + __B, + __D); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C) +{ + return (__m512i) + __builtin_ia32_vcvttph2uw512_mask_round (__B, + (__v32hi) + _mm512_setzero_si512 (), + __A, + __C); +} + +#else +#define _mm512_cvtt_roundph_epu16(A, B) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((A), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((C), \ + (__v32hi)(A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \ + ((__m512i) \ + __builtin_ia32_vcvttph2uw512_mask_round ((B), \ + (__v32hi) \ + _mm512_setzero_si512 (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtw2ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtepi16_ph (__m512i __A) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepi16_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + __B); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + __D); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepi16_ph(A, B) \ + (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \ + (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \ + (A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \ + (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \ + _mm512_setzero_ph (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtuw2ph. */ + extern __inline __m512h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_cvtepu16_ph (__m512i __A) + { + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); + } + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundepu16_ph (__m512i __A, int __B) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A, + _mm512_setzero_ph (), + (__mmask32) -1, + __B); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C, + __A, + __B, + __D); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C) +{ + return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B, + _mm512_setzero_ph (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundepu16_ph(A, B) \ + (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \ + _mm512_setzero_ph (), \ + (__mmask32)-1, \ + (B))) + +#define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \ + (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \ + (A), \ + (B), \ + (D))) + +#define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \ + (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \ + _mm512_setzero_ph (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtsh2si, vcvtsh2us. */ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R); +} + +#else +#define _mm_cvt_roundsh_i32(A, B) \ + ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B))) +#define _mm_cvt_roundsh_u32(A, B) \ + ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ + +#ifdef __x86_64__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R); +} + +#else +#define _mm_cvt_roundsh_i64(A, B) \ + ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B))) +#define _mm_cvt_roundsh_u64(A, B) \ + ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ +#endif /* __x86_64__ */ + +/* Intrinsics vcvttsh2si, vcvttsh2us. */ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_i32 (__m128h __A) +{ + return (int) + __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_u32 (__m128h __A) +{ + return (int) + __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_i32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_u32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R); +} + +#else +#define _mm_cvtt_roundsh_i32(A, B) \ + ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B))) +#define _mm_cvtt_roundsh_u32(A, B) \ + ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ + +#ifdef __x86_64__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_i64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsh_u64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_i64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_roundsh_u64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R); +} + +#else +#define _mm_cvtt_roundsh_i64(A, B) \ + ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B))) +#define _mm_cvtt_roundsh_u64(A, B) \ + ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ +#endif /* __x86_64__ */ + +/* Intrinsics vcvtsi2sh, vcvtusi2sh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti32_sh (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu32_sh (__m128h __A, unsigned int __B) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R); +} + +#else +#define _mm_cvt_roundi32_sh(A, B, C) \ + (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C))) +#define _mm_cvt_roundu32_sh(A, B, C) \ + (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C))) + +#endif /* __OPTIMIZE__ */ + +#ifdef __x86_64__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti64_sh (__m128h __A, long long __B) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu64_sh (__m128h __A, unsigned long long __B) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R); +} + +#else +#define _mm_cvt_roundi64_sh(A, B, C) \ + (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C))) +#define _mm_cvt_roundu64_sh(A, B, C) \ + (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C))) + +#endif /* __OPTIMIZE__ */ +#endif /* __x86_64__ */ + +/* Intrinsics vcvtph2pd. */ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__B, + _mm512_setzero_pd (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_pd (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + __B); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2pd512_mask_round (__B, + _mm512_setzero_pd (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_pd(A, B) \ + (__builtin_ia32_vcvtph2pd512_mask_round ((A), \ + _mm512_setzero_pd (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_pd(A, B, C, D) \ + (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_pd(A, B, C) \ + (__builtin_ia32_vcvtph2pd512_mask_round ((B), \ + _mm512_setzero_pd (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2psx. */ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxph_ps (__m256h __A) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__B, + _mm512_setzero_ps (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundph_ps (__m256h __A, int __B) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + __B); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_vcvtph2psx512_mask_round (__B, + _mm512_setzero_ps (), + __A, + __C); +} + +#else +#define _mm512_cvtx_roundph_ps(A, B) \ + (__builtin_ia32_vcvtph2psx512_mask_round ((A), \ + _mm512_setzero_ps (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \ + (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvtx_roundph_ps(A, B, C) \ + (__builtin_ia32_vcvtph2psx512_mask_round ((B), \ + _mm512_setzero_ps (), \ + (A), \ + (C))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtps2ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxps_ph (__m512 __A) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundps_ph (__m512 __A, int __B) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C, + __A, __B, __D); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C) +{ + return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, __C); +} + +#else +#define _mm512_cvtx_roundps_ph(A, B) \ + (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \ + _mm256_setzero_ph (),\ + (__mmask16)-1, (B))) + +#define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \ + (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \ + (A), (B), (D))) + +#define _mm512_maskz_cvtx_roundps_ph(A, B, C) \ + (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \ + _mm256_setzero_ph (),\ + (A), (C))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtpd2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_ph (__m512d __A) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_ph (__m512d __A, int __B) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C, + __A, __B, __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C) +{ + return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, __C); +} + +#else +#define _mm512_cvt_roundpd_ph(A, B) \ + (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (B))) + +#define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \ + (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \ + (A), (B), (D))) + +#define _mm512_maskz_cvt_roundpd_ph(A, B, C) \ + (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \ + _mm_setzero_ph (), \ + (A), (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtsh2ss, vcvtsh2sd. */ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_ss (__m128 __A, __m128h __B) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, + _mm_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C, + __m128h __D) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B, + __m128h __C) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, + _mm_setzero_ps (), + __A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_sd (__m128d __A, __m128h __B) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, + _mm_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C, + __m128h __D) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, + _mm_setzero_pd (), + __A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A, + _mm_setzero_ps (), + (__mmask8) -1, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C, + __m128h __D, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B, + __m128h __C, const int __R) +{ + return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B, + _mm_setzero_ps (), + __A, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A, + _mm_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C, + __m128h __D, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R) +{ + return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B, + _mm_setzero_pd (), + __A, __R); +} + +#else +#define _mm_cvt_roundsh_ss(A, B, R) \ + (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \ + _mm_setzero_ps (), \ + (__mmask8) -1, (R))) + +#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \ + (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R))) + +#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \ + (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \ + _mm_setzero_ps (), \ + (A), (R))) + +#define _mm_cvt_roundsh_sd(A, B, R) \ + (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \ + _mm_setzero_pd (), \ + (__mmask8) -1, (R))) + +#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \ + (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R))) + +#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \ + (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \ + _mm_setzero_pd (), \ + (A), (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtss2sh, vcvtsd2sh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_sh (__m128h __A, __m128 __B) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_sh (__m128h __A, __m128d __B) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D, + const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C, + const int __R) +{ + return __builtin_ia32_vcvtss2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A, + _mm_setzero_ph (), + (__mmask8) -1, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D, + const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C, + const int __R) +{ + return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B, + _mm_setzero_ph (), + __A, __R); +} + +#else +#define _mm_cvt_roundss_sh(A, B, R) \ + (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \ + _mm_setzero_ph (), \ + (__mmask8) -1, R)) + +#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \ + (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R))) + +#define _mm_maskz_cvt_roundss_sh(A, B, C, R) \ + (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \ + _mm_setzero_ph (), \ + A, R)) + +#define _mm_cvt_roundsd_sh(A, B, R) \ + (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \ + _mm_setzero_ph (), \ + (__mmask8) -1, R)) + +#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \ + (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R))) + +#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \ + (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \ + _mm_setzero_ph (), \ + (A), (R))) + +#endif /* __OPTIMIZE__ */ + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 1787ed5..59906d2 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -34,6 +34,123 @@ #define __DISABLE_AVX512FP16VL__ #endif /* __AVX512FP16VL__ */ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_ps (__m128h __a) +{ + return (__m128) __a; +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_ps (__m256h __a) +{ + return (__m256) __a; +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_pd (__m128h __a) +{ + return (__m128d) __a; +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_pd (__m256h __a) +{ + return (__m256d) __a; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_si128 (__m128h __a) +{ + return (__m128i) __a; +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_si256 (__m256h __a) +{ + return (__m256i) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_ph (__m128 __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castps_ph (__m256 __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ph (__m128d __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castpd_ph (__m256d __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ph (__m128i __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castsi256_ph (__m256i __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph256_ph128 (__m256h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph128_ph256 (__m128h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextph128_ph256 (__m128h __A) +{ + return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (), + (__m128) __A, 0); +} + /* Intrinsics v[add,sub,mul,div]ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -53,30 +170,30 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_addph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_addph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vaddph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_addph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } extern __inline __m128h @@ -97,30 +214,30 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_subph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_subph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vsubph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_subph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } extern __inline __m128h @@ -141,30 +258,30 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_mulph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_mulph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vmulph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_mulph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } extern __inline __m128h @@ -185,30 +302,30 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_divph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_divph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vdivph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_divph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } /* Intrinsics v[max,min]ph. */ @@ -216,96 +333,96 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ph (__m128h __A, __m128h __B) { - return __builtin_ia32_vmaxph_v8hf_mask (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1); + return __builtin_ia32_maxph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_ph (__m256h __A, __m256h __B) { - return __builtin_ia32_vmaxph_v16hf_mask (__A, __B, - _mm256_setzero_ph (), - (__mmask16) -1); + return __builtin_ia32_maxph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vmaxph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_maxph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vmaxph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_maxph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vmaxph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vmaxph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_maxph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ph (__m128h __A, __m128h __B) { - return __builtin_ia32_vminph_v8hf_mask (__A, __B, - _mm_setzero_ph (), - (__mmask8) -1); + return __builtin_ia32_minph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_ph (__m256h __A, __m256h __B) { - return __builtin_ia32_vminph_v16hf_mask (__A, __B, - _mm256_setzero_ph (), - (__mmask16) -1); + return __builtin_ia32_minph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) { - return __builtin_ia32_vminph_v8hf_mask (__C, __D, __A, __B); + return __builtin_ia32_minph128_mask (__C, __D, __A, __B); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - return __builtin_ia32_vminph_v16hf_mask (__C, __D, __A, __B); + return __builtin_ia32_minph256_mask (__C, __D, __A, __B); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C) { - return __builtin_ia32_vminph_v8hf_mask (__B, __C, _mm_setzero_ph (), - __A); + return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (), + __A); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C) { - return __builtin_ia32_vminph_v16hf_mask (__B, __C, - _mm256_setzero_ph (), __A); + return __builtin_ia32_minph256_mask (__B, __C, + _mm256_setzero_ph (), __A); } /* vcmpph */ @@ -314,8 +431,8 @@ extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C) { - return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__A, __B, __C, - (__mmask8) -1); + return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C, + (__mmask8) -1); } extern __inline __mmask8 @@ -323,15 +440,15 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C, const int __D) { - return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__B, __C, __D, __A); + return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A); } extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C) { - return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__A, __B, __C, - (__mmask16) -1); + return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C, + (__mmask16) -1); } extern __inline __mmask16 @@ -339,25 +456,1819 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C, const int __D) { - return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__B, __C, __D, - __A); + return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D, + __A); } #else -#define _mm_cmp_ph_mask(A, B, C) \ - (__builtin_ia32_vcmpph_v8hf_mask ((A), (B), (C), (-1))) +#define _mm_cmp_ph_mask(A, B, C) \ + (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1))) -#define _mm_mask_cmp_ph_mask(A, B, C, D) \ - (__builtin_ia32_vcmpph_v8hf_mask ((B), (C), (D), (A))) +#define _mm_mask_cmp_ph_mask(A, B, C, D) \ + (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A))) -#define _mm256_cmp_ph_mask(A, B, C) \ - (__builtin_ia32_vcmpph_v16hf_mask ((A), (B), (C), (-1))) +#define _mm256_cmp_ph_mask(A, B, C) \ + (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1))) -#define _mm256_mask_cmp_ph_mask(A, B, C, D) \ - (__builtin_ia32_vcmpph_v16hf_mask ((B), (C), (D), (A))) +#define _mm256_mask_cmp_ph_mask(A, B, C, D) \ + (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A))) #endif /* __OPTIMIZE__ */ +/* Intrinsics vsqrtph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ph (__m128h __A) +{ + return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_sqrt_ph (__m256h __A) +{ + return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_sqrtph128_mask (__C, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_sqrtph256_mask (__C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (), + __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vrsqrtph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ph (__m128h __A) +{ + return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_rsqrt_ph (__m256h __A) +{ + return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_rsqrtph128_mask (__C, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_rsqrtph256_mask (__C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vrcpph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ph (__m128h __A) +{ + return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_rcp_ph (__m256h __A) +{ + return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_rcpph128_mask (__C, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_rcpph256_mask (__C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vscalefph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_ph (__m128h __A, __m128h __B) +{ + return __builtin_ia32_scalefph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_scalef_ph (__m256h __A, __m256h __B) +{ + return __builtin_ia32_scalefph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) +{ + return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C, + __m256h __D) +{ + return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C) +{ + return __builtin_ia32_scalefph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C) +{ + return __builtin_ia32_scalefph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vreduceph. */ +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_ph (__m128h __A, int __B) +{ + return __builtin_ia32_reduceph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_reduceph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_ph (__m256h __A, int __B) +{ + return __builtin_ia32_reduceph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D) +{ + return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_reduceph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} + +#else +#define _mm_reduce_ph(A, B) \ + (__builtin_ia32_reduceph128_mask ((A), (B), \ + _mm_setzero_ph (), \ + ((__mmask8)-1))) + +#define _mm_mask_reduce_ph(A, B, C, D) \ + (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B))) + +#define _mm_maskz_reduce_ph(A, B, C) \ + (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A))) + +#define _mm256_reduce_ph(A, B) \ + (__builtin_ia32_reduceph256_mask ((A), (B), \ + _mm256_setzero_ph (), \ + ((__mmask16)-1))) + +#define _mm256_mask_reduce_ph(A, B, C, D) \ + (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B))) + +#define _mm256_maskz_reduce_ph(A, B, C) \ + (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vrndscaleph. */ +#ifdef __OPTIMIZE__ + extern __inline __m128h + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm_roundscale_ph (__m128h __A, int __B) + { + return __builtin_ia32_rndscaleph128_mask (__A, __B, + _mm_setzero_ph (), + (__mmask8) -1); + } + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_rndscaleph128_mask (__B, __C, + _mm_setzero_ph (), __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_roundscale_ph (__m256h __A, int __B) +{ + return __builtin_ia32_rndscaleph256_mask (__A, __B, + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C, + int __D) +{ + return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_rndscaleph256_mask (__B, __C, + _mm256_setzero_ph (), + __A); +} + +#else +#define _mm_roundscale_ph(A, B) \ + (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), \ + ((__mmask8)-1))) + +#define _mm_mask_roundscale_ph(A, B, C, D) \ + (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B))) + +#define _mm_maskz_roundscale_ph(A, B, C) \ + (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A))) + +#define _mm256_roundscale_ph(A, B) \ + (__builtin_ia32_rndscaleph256_mask ((A), (B), \ + _mm256_setzero_ph(), \ + ((__mmask16)-1))) + +#define _mm256_mask_roundscale_ph(A, B, C, D) \ + (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B))) + +#define _mm256_maskz_roundscale_ph(A, B, C) \ + (__builtin_ia32_rndscaleph256_mask ((B), (C), \ + _mm256_setzero_ph (), (A))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfpclassph. */ +#ifdef __OPTIMIZE__ +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, + __imm, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fpclass_ph_mask (__m128h __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, + __imm, + (__mmask8) -1); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm) +{ + return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, + __imm, __U); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fpclass_ph_mask (__m256h __A, const int __imm) +{ + return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, + __imm, + (__mmask16) -1); +} + +#else +#define _mm_fpclass_ph_mask(X, C) \ + ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \ + (int) (C),(__mmask8)-1)) + +#define _mm_mask_fpclass_ph_mask(u, X, C) \ + ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \ + (int) (C),(__mmask8)(u))) + +#define _mm256_fpclass_ph_mask(X, C) \ + ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \ + (int) (C),(__mmask16)-1)) + +#define _mm256_mask_fpclass_ph_mask(u, X, C) \ + ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \ + (int) (C),(__mmask16)(u))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vgetexpph, vgetexpsh. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_getexp_ph (__m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) __W, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A) +{ + return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getexp_ph (__m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) __W, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getexp_ph (__mmask8 __U, __m128h __A) +{ + return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U); +} + + +/* Intrinsics vgetmantph, vgetmantsh. */ +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) __W, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, + (__C << 2) | __B, + (__v16hf) + _mm256_setzero_ph (), + (__mmask16) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) __W, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_getmant_ph (__mmask8 __U, __m128h __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, + (__C << 2) | __B, + (__v8hf) + _mm_setzero_ph (), + (__mmask8) __U); +} + +#else +#define _mm256_getmant_ph(X, B, C) \ + ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v16hf)(__m256h)_mm256_setzero_ph (), \ + (__mmask16)-1)) + +#define _mm256_mask_getmant_ph(W, U, X, B, C) \ + ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v16hf)(__m256h)(W), \ + (__mmask16)(U))) + +#define _mm256_maskz_getmant_ph(U, X, B, C) \ + ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v16hf)(__m256h)_mm256_setzero_ph (), \ + (__mmask16)(U))) + +#define _mm_getmant_ph(X, B, C) \ + ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v8hf)(__m128h)_mm_setzero_ph (), \ + (__mmask8)-1)) + +#define _mm_mask_getmant_ph(W, U, X, B, C) \ + ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v8hf)(__m128h)(W), \ + (__mmask8)(U))) + +#define _mm_maskz_getmant_ph(U, X, B, C) \ + ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ + (int)(((C)<<2) | (B)), \ + (__v8hf)(__m128h)_mm_setzero_ph (), \ + (__mmask8)(U))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2dq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2udq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2udq256_mask (__B, + (__v8si) _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2dq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__A, + (__v4si) _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C, + ( __v4si) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2dq128_mask (__B, + (__v4si) _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__C, + ( __v8si) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2dq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2udq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu32 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__C, + ( __v4si) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2udq128_mask (__B, + (__v4si) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu32 (__m128h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__A, + (__v8si) + _mm256_setzero_si256 (), (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__C, + ( __v8si) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2udq256_mask (__B, + (__v8si) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtdq2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi32_ph (__m128i __A) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi32_ph (__m256i __A) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtudq2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu32_ph (__m128i __A) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C, + __A, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu32_ph (__m256i __A) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtph2qq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi64 (__m128h __A) +{ + return + __builtin_ia32_vcvtph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2uqq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2qq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq128_mask (__C, + __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2qq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2qq256_mask (__C, + __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2qq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2uqq. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__A, + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__C, + __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq128_mask (__B, + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu64 (__m128h __A) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__A, + _mm256_setzero_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__C, + __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvttph2uqq256_mask (__B, + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtqq2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi64_ph (__m128i __A) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi64_ph (__m256i __A) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtuqq2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu64_ph (__m128i __A) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu64_ph (__m256i __A) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B) +{ + return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtph2w. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtph2uw. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvtph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvtph2uw256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2w. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epi16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__C, + ( __v8hi) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2w128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epi16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__C, + ( __v16hi) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2w256_mask (__B, + (__v16hi) + _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvttph2uw. */ +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttph_epu16 (__m128h __A) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__C, + ( __v8hi) __A, + __B); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B) +{ + return (__m128i) + __builtin_ia32_vcvttph2uw128_mask (__B, + (__v8hi) + _mm_setzero_si128 (), + __A); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvttph_epu16 (__m256h __A) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__C, + ( __v16hi) __A, + __B); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B) +{ + return (__m256i) + __builtin_ia32_vcvttph2uw256_mask (__B, + (__v16hi) _mm256_setzero_si256 (), + __A); +} + +/* Intrinsics vcvtw2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_ph (__m128i __A) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C, + __A, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepi16_ph (__m256i __A) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A, + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C, + __A, + __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B) +{ + return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B, + _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vcvtuw2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepu16_ph (__m128i __A) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B) +{ + return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtepu16_ph (__m256i __A) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A, + _mm256_setzero_ph (), + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B) +{ + return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B, + _mm256_setzero_ph (), + __A); +} + +/* Intrinsics vcvtph2pd. */ +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd128_mask (__A, + _mm_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd256_mask (__A, + _mm256_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd256_mask (__B, + _mm256_setzero_pd (), + __A); +} + +/* Intrinsics vcvtph2ps. */ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2psx128_mask (__A, + _mm_setzero_ps (), + (__mmask8) -1); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2psx256_mask (__A, + _mm256_setzero_ps (), + (__mmask8) -1); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2psx256_mask (__B, + _mm256_setzero_ps (), + __A); +} + +/* Intrinsics vcvtxps2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxps_ph (__m128 __A) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B) +{ + return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxps_ph (__m256 __A) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B) +{ + return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtpd2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_ph (__m128d __A) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B) +{ + return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtpd_ph (__m256d __A) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) +{ + return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B, + _mm_setzero_ph (), + __A); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index d11c02b..7fd4286 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -134,6 +134,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST) DEF_POINTER_TYPE (PVOID, VOID) DEF_POINTER_TYPE (PDOUBLE, DOUBLE) DEF_POINTER_TYPE (PFLOAT, FLOAT) +DEF_POINTER_TYPE (PCFLOAT16, FLOAT16, CONST) DEF_POINTER_TYPE (PSHORT, SHORT) DEF_POINTER_TYPE (PUSHORT, USHORT) DEF_POINTER_TYPE (PINT, INT) @@ -1304,17 +1305,72 @@ DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID) # FP16 builtins DEF_FUNCTION_TYPE (V8HF, V8HI) +DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI) +DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI) +DEF_FUNCTION_TYPE (SI, V32HF, INT, USI) +DEF_FUNCTION_TYPE (INT, V8HF, INT) +DEF_FUNCTION_TYPE (INT64, V8HF, INT) +DEF_FUNCTION_TYPE (UINT, V8HF, INT) +DEF_FUNCTION_TYPE (UINT64, V8HF, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF) +DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT) +DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI) +DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI) +DEF_FUNCTION_TYPE (V2DF, V8HF, V2DF, UQI) +DEF_FUNCTION_TYPE (V4DF, V8HF, V4DF, UQI) +DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI) +DEF_FUNCTION_TYPE (V4SF, V8HF, V4SF, UQI) +DEF_FUNCTION_TYPE (V8SI, V8HF, V8SI, UQI) +DEF_FUNCTION_TYPE (V8SF, V8HF, V8SF, UQI) +DEF_FUNCTION_TYPE (V8HI, V8HF, V8HI, UQI) +DEF_FUNCTION_TYPE (V8HF, V4SI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V4SF, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8SI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8SF, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V2DI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V4DI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V2DF, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V4DF, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8HI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI) DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT) +DEF_FUNCTION_TYPE (V8DI, V8HF, V8DI, UQI, INT) +DEF_FUNCTION_TYPE (V8DF, V8HF, V8DF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V8DI, V8HF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V8DF, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V2DF, V8HF, V8HF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V4SF, V8HF, V8HF, UQI, INT) +DEF_FUNCTION_TYPE (V2DF, V8HF, V2DF, V2DF, UQI, INT) +DEF_FUNCTION_TYPE (V4SF, V8HF, V4SF, V4SF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF) +DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI) +DEF_FUNCTION_TYPE (V16HF, V16HI, V16HF, UHI) +DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI) +DEF_FUNCTION_TYPE (V16SI, V16HF, V16SI, UHI, INT) +DEF_FUNCTION_TYPE (V16SF, V16HF, V16SF, UHI, INT) +DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI) DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI) +DEF_FUNCTION_TYPE (V16HF, V16SI, V16HF, UHI, INT) +DEF_FUNCTION_TYPE (V16HF, V16SF, V16HF, UHI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI) +DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT) +DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI, INT) +DEF_FUNCTION_TYPE (V32HF, V32HI, V32HF, USI, INT) DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI) +DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI) DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT) +DEF_FUNCTION_TYPE (V32HF, V32HF, INT, V32HF, USI, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index c9d80cb..dc56dc2 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -393,6 +393,10 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mas BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +/* AVX512FP16 */ +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_storehf_mask, "__builtin_ia32_storesh_mask", IX86_BUILTIN_STORESH_MASK, UNKNOWN, (int) VOID_FTYPE_PCFLOAT16_V8HF_UQI) + /* RDPKRU and WRPKRU. */ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID) BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED) @@ -2775,33 +2779,102 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__b BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI) /* AVX512FP16. */ -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_vaddsh_v8hf_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_vsubsh_v8hf_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_vmulsh_v8hf_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_vdivsh_v8hf_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_vmaxph_v8hf_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_vmaxph_v16hf_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_vmaxph_v32hf_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_vminph_v8hf_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_vminph_v16hf_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_vminph_v32hf_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_vmaxsh_v8hf_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_vminsh_v8hf_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_vcmpph_v8hf_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_vcmpph_v16hf_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_vcmpph_v32hf_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_reduceph128_mask", IX86_BUILTIN_REDUCEPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_reduceph256_mask", IX86_BUILTIN_REDUCEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_rndscaleph128_mask", IX86_BUILTIN_RNDSCALEPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getmantv16hf_mask, "__builtin_ia32_getmantph256_mask", IX86_BUILTIN_GETMANTPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getmantv8hf_mask, "__builtin_ia32_getmantph128_mask", IX86_BUILTIN_GETMANTPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_movhf_mask, "__builtin_ia32_vmovsh_mask", IX86_BUILTIN_VMOVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v4si_mask, "__builtin_ia32_vcvtph2dq128_mask", IX86_BUILTIN_VCVTPH2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v8si_mask, "__builtin_ia32_vcvtph2dq256_mask", IX86_BUILTIN_VCVTPH2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v4si_mask, "__builtin_ia32_vcvtph2udq128_mask", IX86_BUILTIN_VCVTPH2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v8si_mask, "__builtin_ia32_vcvtph2udq256_mask", IX86_BUILTIN_VCVTPH2UDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv4si2_mask, "__builtin_ia32_vcvttph2dq128_mask", IX86_BUILTIN_VCVTTPH2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8si2_mask, "__builtin_ia32_vcvttph2dq256_mask", IX86_BUILTIN_VCVTTPH2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv4si2_mask, "__builtin_ia32_vcvttph2udq128_mask", IX86_BUILTIN_VCVTTPH2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8si2_mask, "__builtin_ia32_vcvttph2udq256_mask", IX86_BUILTIN_VCVTTPH2UDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v2di_mask, "__builtin_ia32_vcvtph2qq128_mask", IX86_BUILTIN_VCVTPH2QQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v4di_mask, "__builtin_ia32_vcvtph2qq256_mask", IX86_BUILTIN_VCVTPH2QQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v2di_mask, "__builtin_ia32_vcvtph2uqq128_mask", IX86_BUILTIN_VCVTPH2UQQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v4di_mask, "__builtin_ia32_vcvtph2uqq256_mask", IX86_BUILTIN_VCVTPH2UQQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv2di2_mask, "__builtin_ia32_vcvttph2qq128_mask", IX86_BUILTIN_VCVTTPH2QQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv4di2_mask, "__builtin_ia32_vcvttph2qq256_mask", IX86_BUILTIN_VCVTTPH2QQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv2di2_mask, "__builtin_ia32_vcvttph2uqq128_mask", IX86_BUILTIN_VCVTTPH2UQQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv4di2_mask, "__builtin_ia32_vcvttph2uqq256_mask", IX86_BUILTIN_VCVTTPH2UQQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v8hi_mask, "__builtin_ia32_vcvtph2w128_mask", IX86_BUILTIN_VCVTPH2W128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v16hi_mask, "__builtin_ia32_vcvtph2w256_mask", IX86_BUILTIN_VCVTPH2W256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v8hi_mask, "__builtin_ia32_vcvtph2uw128_mask", IX86_BUILTIN_VCVTPH2UW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v16hi_mask, "__builtin_ia32_vcvtph2uw256_mask", IX86_BUILTIN_VCVTPH2UW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8hi2_mask, "__builtin_ia32_vcvttph2w128_mask", IX86_BUILTIN_VCVTTPH2W128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv16hi2_mask, "__builtin_ia32_vcvttph2w256_mask", IX86_BUILTIN_VCVTTPH2W256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8hi2_mask, "__builtin_ia32_vcvttph2uw128_mask", IX86_BUILTIN_VCVTTPH2UW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv16hi2_mask, "__builtin_ia32_vcvttph2uw256_mask", IX86_BUILTIN_VCVTTPH2UW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v8hi_mask, "__builtin_ia32_vcvtw2ph128_mask", IX86_BUILTIN_VCVTW2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v16hi_mask, "__builtin_ia32_vcvtw2ph256_mask", IX86_BUILTIN_VCVTW2PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v8hi_mask, "__builtin_ia32_vcvtuw2ph128_mask", IX86_BUILTIN_VCVTUW2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v16hi_mask, "__builtin_ia32_vcvtuw2ph256_mask", IX86_BUILTIN_VCVTUW2PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v4si_mask, "__builtin_ia32_vcvtdq2ph128_mask", IX86_BUILTIN_VCVTDQ2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v8si_mask, "__builtin_ia32_vcvtdq2ph256_mask", IX86_BUILTIN_VCVTDQ2PH256_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v4si_mask, "__builtin_ia32_vcvtudq2ph128_mask", IX86_BUILTIN_VCVTUDQ2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v8si_mask, "__builtin_ia32_vcvtudq2ph256_mask", IX86_BUILTIN_VCVTUDQ2PH256_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v2di_mask, "__builtin_ia32_vcvtqq2ph128_mask", IX86_BUILTIN_VCVTQQ2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v4di_mask, "__builtin_ia32_vcvtqq2ph256_mask", IX86_BUILTIN_VCVTQQ2PH256_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v2di_mask, "__builtin_ia32_vcvtuqq2ph128_mask", IX86_BUILTIN_VCVTUQQ2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v4di_mask, "__builtin_ia32_vcvtuqq2ph256_mask", IX86_BUILTIN_VCVTUQQ2PH256_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv2df2_mask, "__builtin_ia32_vcvtph2pd128_mask", IX86_BUILTIN_VCVTPH2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv4df2_mask, "__builtin_ia32_vcvtph2pd256_mask", IX86_BUILTIN_VCVTPH2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V8HF_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv4sf2_mask, "__builtin_ia32_vcvtph2psx128_mask", IX86_BUILTIN_VCVTPH2PSX128_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8sf2_mask, "__builtin_ia32_vcvtph2psx256_mask", IX86_BUILTIN_VCVTPH2PSX256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HF_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v4sf_mask, "__builtin_ia32_vcvtps2phx128_mask", IX86_BUILTIN_VCVTPS2PHX128_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v8sf_mask, "__builtin_ia32_vcvtps2phx256_mask", IX86_BUILTIN_VCVTPS2PHX256_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v2df_mask, "__builtin_ia32_vcvtpd2ph128_mask", IX86_BUILTIN_VCVTPD2PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v4df_mask, "__builtin_ia32_vcvtpd2ph256_mask", IX86_BUILTIN_VCVTPD2PH256_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DF_V8HF_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3003,20 +3076,70 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "_ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT) /* AVX512FP16. */ -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_vaddsh_v8hf_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_vsubsh_v8hf_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_vmulsh_v8hf_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_vdivsh_v8hf_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_vmaxph_v32hf_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_vminph_v32hf_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_vmaxsh_v8hf_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_vminsh_v8hf_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_vcmpph_v32hf_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncsi2_round, "__builtin_ia32_vcvttsh2si32_round", IX86_BUILTIN_VCVTTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncdi2_round, "__builtin_ia32_vcvttsh2si64_round", IX86_BUILTIN_VCVTTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncsi2_round, "__builtin_ia32_vcvttsh2usi32_round", IX86_BUILTIN_VCVTTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncdi2_round, "__builtin_ia32_vcvttsh2usi64_round", IX86_BUILTIN_VCVTTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index e117afb..bfafd15 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -9710,6 +9710,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HI_FTYPE_V16SI_V16HI_UHI: case V16QI_FTYPE_V16SI_V16QI_UHI: case V16QI_FTYPE_V8DI_V16QI_UQI: + case V32HF_FTYPE_V32HF_V32HF_USI: case V16SF_FTYPE_V16SF_V16SF_UHI: case V16SF_FTYPE_V4SF_V16SF_UHI: case V16SI_FTYPE_SI_V16SI_UHI: @@ -9739,20 +9740,40 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HI_FTYPE_HI_V16HI_UHI: case V8HI_FTYPE_V8HI_V8HI_UQI: case V8HI_FTYPE_HI_V8HI_UQI: + case V16HF_FTYPE_V16HF_V16HF_UHI: case V8SF_FTYPE_V8HI_V8SF_UQI: case V4SF_FTYPE_V8HI_V4SF_UQI: + case V8SI_FTYPE_V8HF_V8SI_UQI: + case V8SF_FTYPE_V8HF_V8SF_UQI: case V8SI_FTYPE_V8SF_V8SI_UQI: case V4SI_FTYPE_V4SF_V4SI_UQI: + case V4SI_FTYPE_V8HF_V4SI_UQI: + case V4SF_FTYPE_V8HF_V4SF_UQI: + case V4DI_FTYPE_V8HF_V4DI_UQI: case V4DI_FTYPE_V4SF_V4DI_UQI: + case V2DI_FTYPE_V8HF_V2DI_UQI: case V2DI_FTYPE_V4SF_V2DI_UQI: + case V8HF_FTYPE_V8HF_V8HF_UQI: + case V8HF_FTYPE_V8HI_V8HF_UQI: + case V8HF_FTYPE_V8SI_V8HF_UQI: + case V8HF_FTYPE_V8SF_V8HF_UQI: + case V8HF_FTYPE_V4SI_V8HF_UQI: + case V8HF_FTYPE_V4SF_V8HF_UQI: + case V8HF_FTYPE_V4DI_V8HF_UQI: + case V8HF_FTYPE_V4DF_V8HF_UQI: + case V8HF_FTYPE_V2DI_V8HF_UQI: + case V8HF_FTYPE_V2DF_V8HF_UQI: case V4SF_FTYPE_V4DI_V4SF_UQI: case V4SF_FTYPE_V2DI_V4SF_UQI: case V4DF_FTYPE_V4DI_V4DF_UQI: + case V4DF_FTYPE_V8HF_V4DF_UQI: + case V2DF_FTYPE_V8HF_V2DF_UQI: case V2DF_FTYPE_V2DI_V2DF_UQI: case V16QI_FTYPE_V8HI_V16QI_UQI: case V16QI_FTYPE_V16HI_V16QI_UHI: case V16QI_FTYPE_V4SI_V16QI_UQI: case V16QI_FTYPE_V8SI_V16QI_UQI: + case V8HI_FTYPE_V8HF_V8HI_UQI: case V8HI_FTYPE_V4SI_V8HI_UQI: case V8HI_FTYPE_V8SI_V8HI_UQI: case V16QI_FTYPE_V2DI_V16QI_UQI: @@ -9810,6 +9831,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8DI_FTYPE_DI_V8DI_UQI: case V16SF_FTYPE_V8SF_V16SF_UHI: case V16SI_FTYPE_V8SI_V16SI_UHI: + case V16HF_FTYPE_V16HI_V16HF_UHI: + case V16HI_FTYPE_V16HF_V16HI_UHI: case V16HI_FTYPE_V16HI_V16HI_UHI: case V8HI_FTYPE_V16QI_V8HI_UQI: case V16HI_FTYPE_V16QI_V16HI_UHI: @@ -9910,6 +9933,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case HI_FTYPE_V16SF_INT_UHI: case QI_FTYPE_V8SF_INT_UQI: case QI_FTYPE_V4SF_INT_UQI: + case QI_FTYPE_V8HF_INT_UQI: + case HI_FTYPE_V16HF_INT_UHI: + case SI_FTYPE_V32HF_INT_USI: case V4SI_FTYPE_V4SI_V4SI_UHI: case V8SI_FTYPE_V8SI_V8SI_UHI: nargs = 3; @@ -10058,6 +10084,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SF_FTYPE_V16SF_INT_V16SF_UHI: case V16HI_FTYPE_V16SF_INT_V16HI_UHI: case V16SI_FTYPE_V16SI_INT_V16SI_UHI: + case V16HF_FTYPE_V16HF_INT_V16HF_UHI: + case V8HF_FTYPE_V8HF_INT_V8HF_UQI: case V4SI_FTYPE_V16SI_INT_V4SI_UQI: case V4DI_FTYPE_V8DI_INT_V4DI_UQI: case V4DF_FTYPE_V8DF_INT_V4DF_UQI: @@ -10229,8 +10257,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx_vpermilv4df_mask: case CODE_FOR_avx512f_getmantv8df_mask: case CODE_FOR_avx512f_getmantv16sf_mask: + case CODE_FOR_avx512vl_getmantv16hf_mask: case CODE_FOR_avx512vl_getmantv8sf_mask: case CODE_FOR_avx512vl_getmantv4df_mask: + case CODE_FOR_avx512fp16_getmantv8hf_mask: case CODE_FOR_avx512vl_getmantv4sf_mask: case CODE_FOR_avx512vl_getmantv2df_mask: case CODE_FOR_avx512dq_rangepv8df_mask_round: @@ -10645,16 +10675,24 @@ ix86_expand_round_builtin (const struct builtin_description *d, { case UINT64_FTYPE_V2DF_INT: case UINT64_FTYPE_V4SF_INT: + case UINT64_FTYPE_V8HF_INT: case UINT_FTYPE_V2DF_INT: case UINT_FTYPE_V4SF_INT: + case UINT_FTYPE_V8HF_INT: case INT64_FTYPE_V2DF_INT: case INT64_FTYPE_V4SF_INT: + case INT64_FTYPE_V8HF_INT: case INT_FTYPE_V2DF_INT: case INT_FTYPE_V4SF_INT: + case INT_FTYPE_V8HF_INT: nargs = 2; break; case V32HF_FTYPE_V32HF_V32HF_INT: case V8HF_FTYPE_V8HF_V8HF_INT: + case V8HF_FTYPE_V8HF_INT_INT: + case V8HF_FTYPE_V8HF_UINT_INT: + case V8HF_FTYPE_V8HF_INT64_INT: + case V8HF_FTYPE_V8HF_UINT64_INT: case V4SF_FTYPE_V4SF_UINT_INT: case V4SF_FTYPE_V4SF_UINT64_INT: case V2DF_FTYPE_V2DF_UINT64_INT: @@ -10669,18 +10707,29 @@ ix86_expand_round_builtin (const struct builtin_description *d, break; case V8SF_FTYPE_V8DF_V8SF_QI_INT: case V8DF_FTYPE_V8DF_V8DF_QI_INT: + case V32HI_FTYPE_V32HF_V32HI_USI_INT: case V8SI_FTYPE_V8DF_V8SI_QI_INT: + case V8DI_FTYPE_V8HF_V8DI_UQI_INT: case V8DI_FTYPE_V8DF_V8DI_QI_INT: case V8SF_FTYPE_V8DI_V8SF_QI_INT: case V8DF_FTYPE_V8DI_V8DF_QI_INT: + case V8DF_FTYPE_V8HF_V8DF_UQI_INT: + case V16SF_FTYPE_V16HF_V16SF_UHI_INT: + case V32HF_FTYPE_V32HI_V32HF_USI_INT: + case V32HF_FTYPE_V32HF_V32HF_USI_INT: case V16SF_FTYPE_V16SF_V16SF_HI_INT: case V8DI_FTYPE_V8SF_V8DI_QI_INT: case V16SF_FTYPE_V16SI_V16SF_HI_INT: case V16SI_FTYPE_V16SF_V16SI_HI_INT: + case V16SI_FTYPE_V16HF_V16SI_UHI_INT: + case V16HF_FTYPE_V16SI_V16HF_UHI_INT: case V8DF_FTYPE_V8SF_V8DF_QI_INT: case V16SF_FTYPE_V16HI_V16SF_HI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: + case V8HF_FTYPE_V8DI_V8HF_UQI_INT: + case V8HF_FTYPE_V8DF_V8HF_UQI_INT: + case V16HF_FTYPE_V16SF_V16HF_UHI_INT: nargs = 4; break; case V4SF_FTYPE_V4SF_V4SF_INT_INT: @@ -10694,8 +10743,10 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: + case V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT: case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT: + case V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT: @@ -10703,8 +10754,11 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: case V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT: case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT: + case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT: + case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT: nargs = 5; break; + case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT: case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT: @@ -10727,6 +10781,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: + case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT: nargs = 6; nargs_constant = 4; break; @@ -10763,10 +10818,12 @@ ix86_expand_round_builtin (const struct builtin_description *d, { case CODE_FOR_avx512f_getmantv8df_mask_round: case CODE_FOR_avx512f_getmantv16sf_mask_round: + case CODE_FOR_avx512bw_getmantv32hf_mask_round: case CODE_FOR_avx512f_vgetmantv2df_round: case CODE_FOR_avx512f_vgetmantv2df_mask_round: case CODE_FOR_avx512f_vgetmantv4sf_round: case CODE_FOR_avx512f_vgetmantv4sf_mask_round: + case CODE_FOR_avx512f_vgetmantv8hf_mask_round: error ("the immediate argument must be a 4-bit immediate"); return const0_rtx; case CODE_FOR_avx512f_cmpv8df3_mask_round: @@ -11070,6 +11127,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PFLOAT_V16SF_UHI: case VOID_FTYPE_PFLOAT_V8SF_UQI: case VOID_FTYPE_PFLOAT_V4SF_UQI: + case VOID_FTYPE_PCFLOAT16_V8HF_UQI: case VOID_FTYPE_PV32QI_V32HI_USI: case VOID_FTYPE_PV16QI_V16HI_UHI: case VOID_FTYPE_PUDI_V8HI_UQI: @@ -11142,6 +11200,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V16SF_FTYPE_PCFLOAT_V16SF_UHI: case V8SF_FTYPE_PCFLOAT_V8SF_UQI: case V4SF_FTYPE_PCFLOAT_V4SF_UQI: + case V8HF_FTYPE_PCFLOAT16_V8HF_UQI: nargs = 3; klass = load; memory = 0; @@ -14054,7 +14113,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, tmp1 = gen_reg_rtx (SImode); emit_move_insn (tmp1, gen_lowpart (SImode, val)); - /* Insert the SImode value as low element of a V4SImode vector. */ + /* Insert the SImode value as low element of a V4SImode vector. */ tmp2 = gen_reg_rtx (V4SImode); emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); @@ -14179,6 +14238,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, break; case E_V8HImode: use_vector_set = TARGET_SSE2; + gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0 + ? gen_vec_setv8hi_0 : NULL; break; case E_V8QImode: use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1; @@ -14190,8 +14251,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, use_vector_set = TARGET_SSE4_1; break; case E_V32QImode: + use_vector_set = TARGET_AVX; + break; case E_V16HImode: use_vector_set = TARGET_AVX; + gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0 + ? gen_vec_setv16hi_0 : NULL; break; case E_V8SImode: use_vector_set = TARGET_AVX; @@ -14239,6 +14304,9 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, use_vector_set = TARGET_AVX512FP16 && one_var == 0; gen_vec_set_0 = gen_vec_setv32hf_0; break; + case E_V32HImode: + use_vector_set = TARGET_AVX512FP16 && one_var == 0; + gen_vec_set_0 = gen_vec_setv32hi_0; default: break; } @@ -14638,7 +14706,7 @@ ix86_expand_vector_init_interleave (machine_mode mode, switch (mode) { case E_V8HFmode: - gen_load_even = gen_vec_setv8hf; + gen_load_even = gen_vec_interleave_lowv8hf; gen_interleave_first_low = gen_vec_interleave_lowv4si; gen_interleave_second_low = gen_vec_interleave_lowv2di; inner_mode = HFmode; @@ -14673,35 +14741,40 @@ ix86_expand_vector_init_interleave (machine_mode mode, op = ops [i + i]; if (inner_mode == HFmode) { - /* Convert HFmode to HImode. */ - op1 = gen_reg_rtx (HImode); - op1 = gen_rtx_SUBREG (HImode, force_reg (HFmode, op), 0); - op = gen_reg_rtx (HImode); - emit_move_insn (op, op1); + rtx even, odd; + /* Use vpuncklwd to pack 2 HFmode. */ + op0 = gen_reg_rtx (V8HFmode); + even = lowpart_subreg (V8HFmode, force_reg (HFmode, op), HFmode); + odd = lowpart_subreg (V8HFmode, + force_reg (HFmode, ops[i + i + 1]), + HFmode); + emit_insn (gen_load_even (op0, even, odd)); } + else + { + /* Extend the odd elment to SImode using a paradoxical SUBREG. */ + op0 = gen_reg_rtx (SImode); + emit_move_insn (op0, gen_lowpart (SImode, op)); - /* Extend the odd elment to SImode using a paradoxical SUBREG. */ - op0 = gen_reg_rtx (SImode); - emit_move_insn (op0, gen_lowpart (SImode, op)); - - /* Insert the SImode value as low element of V4SImode vector. */ - op1 = gen_reg_rtx (V4SImode); - op0 = gen_rtx_VEC_MERGE (V4SImode, - gen_rtx_VEC_DUPLICATE (V4SImode, - op0), - CONST0_RTX (V4SImode), - const1_rtx); - emit_insn (gen_rtx_SET (op1, op0)); + /* Insert the SImode value as low element of V4SImode vector. */ + op1 = gen_reg_rtx (V4SImode); + op0 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, + op0), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (op1, op0)); - /* Cast the V4SImode vector back to a vector in orignal mode. */ - op0 = gen_reg_rtx (mode); - emit_move_insn (op0, gen_lowpart (mode, op1)); + /* Cast the V4SImode vector back to a vector in orignal mode. */ + op0 = gen_reg_rtx (mode); + emit_move_insn (op0, gen_lowpart (mode, op1)); - /* Load even elements into the second position. */ - emit_insn (gen_load_even (op0, - force_reg (inner_mode, - ops [i + i + 1]), - const1_rtx)); + /* Load even elements into the second position. */ + emit_insn (gen_load_even (op0, + force_reg (inner_mode, + ops[i + i + 1]), + const1_rtx)); + } /* Cast vector to FIRST_IMODE vector. */ ops[i] = gen_reg_rtx (first_imode); @@ -15182,6 +15255,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) machine_mode inner_mode = GET_MODE_INNER (mode); machine_mode half_mode; bool use_vec_merge = false; + bool blendm_const = false; rtx tmp; static rtx (*gen_extract[7][2]) (rtx, rtx) = { @@ -15369,7 +15443,14 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) return; case E_V8HFmode: - use_vec_merge = true; + if (TARGET_AVX2) + { + mmode = SImode; + gen_blendm = gen_sse4_1_pblendph; + blendm_const = true; + } + else + use_vec_merge = true; break; case E_V8HImode: @@ -15396,10 +15477,20 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) goto half; case E_V16HFmode: - half_mode = V8HFmode; - j = 6; - n = 8; - goto half; + if (TARGET_AVX2) + { + mmode = SImode; + gen_blendm = gen_avx2_pblendph; + blendm_const = true; + break; + } + else + { + half_mode = V8HFmode; + j = 6; + n = 8; + goto half; + } case E_V16HImode: half_mode = V8HImode; @@ -15560,15 +15651,15 @@ quarter: { tmp = gen_reg_rtx (mode); emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); + rtx merge_mask = gen_int_mode (HOST_WIDE_INT_1U << elt, mmode); /* The avx512*_blendm<mode> expanders have different operand order from VEC_MERGE. In VEC_MERGE, the first input operand is used for elements where the mask is set and second input operand otherwise, in {sse,avx}*_*blend* the first input operand is used for elements where the mask is clear and second input operand otherwise. */ - emit_insn (gen_blendm (target, target, tmp, - force_reg (mmode, - gen_int_mode (HOST_WIDE_INT_1U << elt, - mmode)))); + if (!blendm_const) + merge_mask = force_reg (mmode, merge_mask); + emit_insn (gen_blendm (target, target, tmp, merge_mask)); } else if (use_vec_merge) { diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 5a99ea7..a525a83 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2210,15 +2210,34 @@ remove_partial_avx_dependency (void) != AVX_PARTIAL_XMM_UPDATE_TRUE) continue; - if (!v4sf_const0) - v4sf_const0 = gen_reg_rtx (V4SFmode); - /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and vec_merge with subreg. */ rtx src = SET_SRC (set); rtx dest = SET_DEST (set); machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode = GET_MODE (XEXP (src, 0)); + + switch (src_mode) + { + case E_SFmode: + case E_DFmode: + if (TARGET_USE_VECTOR_FP_CONVERTS + || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY) + continue; + break; + case E_SImode: + case E_DImode: + if (TARGET_USE_VECTOR_CONVERTS + || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY) + continue; + break; + default: + break; + } + + if (!v4sf_const0) + v4sf_const0 = gen_reg_rtx (V4SFmode); rtx zero; machine_mode dest_vecmode; diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index fcadfcd..2a2c8b8 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -90,6 +90,8 @@ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */ VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */ VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, HF, 6); /* V6HF */ VECTOR_MODE (INT, TI, 1); /* V1TI */ VECTOR_MODE (INT, DI, 1); /* V1DI */ VECTOR_MODE (INT, SI, 1); /* V1SI */ diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index c0006b3..e7a3bd4 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -724,7 +724,7 @@ static const struct processor_costs *processor_cost_table[] = &slm_cost, &slm_cost, &slm_cost, - &slm_cost, + &tremont_cost, &slm_cost, &slm_cost, &skylake_cost, diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index dcae34b..708834a 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -320,7 +320,7 @@ struct ix86_address addr_space_t seg; }; -extern int ix86_decompose_address (rtx, struct ix86_address *); +extern bool ix86_decompose_address (rtx, struct ix86_address *); extern int memory_address_length (rtx, bool); extern void x86_output_aligned_bss (FILE *, tree, const char *, unsigned HOST_WIDE_INT, unsigned); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7b173bc..afc2674 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10101,10 +10101,10 @@ ix86_live_on_entry (bitmap regs) } /* Extract the parts of an RTL expression that is a valid memory address - for an instruction. Return 0 if the structure of the address is + for an instruction. Return false if the structure of the address is grossly off. */ -int +bool ix86_decompose_address (rtx addr, struct ix86_address *out) { rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; @@ -10123,17 +10123,17 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) { addr = XEXP (addr, 0); if (CONST_INT_P (addr)) - return 0; + return false; } else if (GET_CODE (addr) == AND && const_32bit_mask (XEXP (addr, 1), DImode)) { addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); if (addr == NULL_RTX) - return 0; + return false; if (CONST_INT_P (addr)) - return 0; + return false; } else if (GET_CODE (addr) == AND) { @@ -10167,7 +10167,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) { addr = SUBREG_REG (addr); if (CONST_INT_P (addr)) - return 0; + return false; } } @@ -10178,7 +10178,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) if (REG_P (SUBREG_REG (addr))) base = addr; else - return 0; + return false; } else if (GET_CODE (addr) == PLUS) { @@ -10189,13 +10189,13 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) do { if (n >= 4) - return 0; + return false; addends[n++] = XEXP (op, 1); op = XEXP (op, 0); } while (GET_CODE (op) == PLUS); if (n >= 4) - return 0; + return false; addends[n] = op; for (i = n; i >= 0; --i) @@ -10205,28 +10205,28 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) { case MULT: if (index) - return 0; + return false; index = XEXP (op, 0); scale_rtx = XEXP (op, 1); break; case ASHIFT: if (index) - return 0; + return false; index = XEXP (op, 0); tmp = XEXP (op, 1); if (!CONST_INT_P (tmp)) - return 0; + return false; scale = INTVAL (tmp); if ((unsigned HOST_WIDE_INT) scale > 3) - return 0; + return false; scale = 1 << scale; break; case ZERO_EXTEND: op = XEXP (op, 0); if (GET_CODE (op) != UNSPEC) - return 0; + return false; /* FALLTHRU */ case UNSPEC: @@ -10235,12 +10235,12 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) && seg == ADDR_SPACE_GENERIC) seg = DEFAULT_TLS_SEG_REG; else - return 0; + return false; break; case SUBREG: if (!REG_P (SUBREG_REG (op))) - return 0; + return false; /* FALLTHRU */ case REG: @@ -10249,7 +10249,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) else if (!index) index = op; else - return 0; + return false; break; case CONST: @@ -10257,12 +10257,12 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) case SYMBOL_REF: case LABEL_REF: if (disp) - return 0; + return false; disp = op; break; default: - return 0; + return false; } } } @@ -10277,10 +10277,10 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) index = XEXP (addr, 0); tmp = XEXP (addr, 1); if (!CONST_INT_P (tmp)) - return 0; + return false; scale = INTVAL (tmp); if ((unsigned HOST_WIDE_INT) scale > 3) - return 0; + return false; scale = 1 << scale; } else @@ -10294,14 +10294,14 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) && REG_P (SUBREG_REG (index))) ; else - return 0; + return false; } /* Extract the integral value of scale. */ if (scale_rtx) { if (!CONST_INT_P (scale_rtx)) - return 0; + return false; scale = INTVAL (scale_rtx); } @@ -10354,7 +10354,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) out->scale = scale; out->seg = seg; - return 1; + return true; } /* Return cost of the memory address x. @@ -16976,6 +16976,7 @@ ix86_sched_init_global (FILE *, int, int) case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: + case PROCESSOR_TREMONT: case PROCESSOR_GENERIC: /* Do not perform multipass scheduling for pre-reload schedule to save compile time. */ @@ -19443,8 +19444,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, /* Vector registers do not support QI or HImode loads. If we don't disallow a change to these modes, reload will assume it's ok to drop the subreg from (subreg:SI (reg:HI 100) 0). This affects - the vec_dupv4hi pattern. */ - if (GET_MODE_SIZE (from) < 4) + the vec_dupv4hi pattern. + NB: AVX512FP16 supports vmovw which can load 16bit data to sse + register. */ + int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4; + if (GET_MODE_SIZE (from) < mov_size) return false; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e76bb55..ec60b89 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -334,6 +334,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY] #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY] +#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \ + ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY] #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 13f6f57..c82a9dc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4535,7 +4535,8 @@ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) @@ -4708,7 +4709,8 @@ (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) @@ -5243,7 +5245,8 @@ [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] "!TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY + && epilogue_completed && optimize_function_for_speed_p (cfun) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 516eb45..d7a1328 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -396,6 +396,13 @@ (define_mode_iterator VF1_AVX512ER_128_256 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF]) +(define_mode_iterator VFH_AVX512VL + [(V32HF "TARGET_AVX512FP16") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + (define_mode_iterator VF2_AVX512VL [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) @@ -405,6 +412,9 @@ (define_mode_iterator VF_AVX512FP16 [V32HF V16HF V8HF]) +(define_mode_iterator VF_AVX512FP16VL + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) + ;; All vector integer modes (define_mode_iterator VI [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") @@ -493,6 +503,11 @@ (define_mode_iterator VI2_AVX512VL [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI]) +(define_mode_iterator VI2H_AVX512VL + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI + (V8SI "TARGET_AVX512VL") V16SI + V8DI ]) + (define_mode_iterator VI1_AVX512VL_F [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")]) @@ -622,6 +637,9 @@ (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) +(define_mode_iterator VF4_128_8_256 + [V4DF V4SF]) + (define_mode_iterator VI1_AVX512VLBW [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")]) @@ -707,7 +725,8 @@ [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")]) (define_mode_attr vecmemsuffix - [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}") + [(V32HF "{z}") (V16HF "{y}") (V8HF "{x}") + (V16SF "{z}") (V8SF "{y}") (V4SF "{x}") (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")]) (define_mode_attr ssedoublemodelower @@ -727,6 +746,11 @@ [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI") (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")]) +(define_mode_attr sseintconvert + [(V32HI "w") (V16HI "w") (V8HI "w") + (V16SI "dq") (V8SI "dq") (V4SI "dq") + (V8DI "qq") (V4DI "qq") (V2DI "qq")]) + ;; All 128bit vector integer modes (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) @@ -768,6 +792,7 @@ (V32HF "TARGET_AVX512BW")]) ;; Int-float size matches +(define_mode_iterator VI2F [V8HI V16HI V32HI V8HF V16HF V32HF]) (define_mode_iterator VI4F_128 [V4SI V4SF]) (define_mode_iterator VI8F_128 [V2DI V2DF]) (define_mode_iterator VI4F_256 [V8SI V8SF]) @@ -782,6 +807,12 @@ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_mode_iterator VF48_I1248 [V16SI V16SF V8DI V8DF V32HI V64QI]) +(define_mode_iterator VF48H_AVX512VL + [V8DF V16SF (V8SF "TARGET_AVX512VL")]) + +(define_mode_iterator VF48_128 + [V2DF V4SF]) + (define_mode_iterator VI48F [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") @@ -806,6 +837,7 @@ (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") V16SF V8DF]) +(define_mode_iterator V8_128 [V8HI V8HF]) (define_mode_iterator V16_256 [V16HI V16HF]) (define_mode_iterator V32_512 [V32HI V32HF]) @@ -918,9 +950,9 @@ ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode - [(V16SF "V16SI") (V8DF "V8DI") - (V8SF "V8SI") (V4DF "V4DI") - (V4SF "V4SI") (V2DF "V2DI") + [(V32HF "V32HI") (V16SF "V16SI") (V8DF "V8DI") + (V16HF "V16HI") (V8SF "V8SI") (V4DF "V4DI") + (V8HF "V8HI") (V4SF "V4SI") (V2DF "V2DI") (V16SI "V16SI") (V8DI "V8DI") (V8SI "V8SI") (V4DI "V4DI") (V4SI "V4SI") (V2DI "V2DI") @@ -971,6 +1003,13 @@ (V4SF "v2sf") (V32HF "v16hf") (V16HF "v8hf") (V8HF "v4hf")]) +;; Mapping of vector modes to vector hf modes of conversion. +(define_mode_attr ssePHmode + [(V32HI "V32HF") (V16HI "V16HF") (V8HI "V8HF") + (V16SI "V16HF") (V8SI "V8HF") (V4SI "V8HF") + (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF") + (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")]) + ;; Mapping of vector modes to packed single mode of the same size (define_mode_attr ssePSmode [(V16SI "V16SF") (V8DF "V16SF") @@ -1116,7 +1155,8 @@ ;; Mapping of mode to cast intrinsic name (define_mode_attr castmode - [(V8SI "si") (V8SF "ps") (V4DF "pd") + [(V4SF "ps") (V2DF "pd") + (V8SI "si") (V8SF "ps") (V4DF "pd") (V16SI "si") (V16SF "ps") (V8DF "pd")]) ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. @@ -1349,13 +1389,13 @@ [(set (match_dup 0) (match_dup 1))]) (define_insn "avx512f_mov<ssescalarmodelower>_mask" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (vec_merge:VF_128 - (match_operand:VF_128 2 "register_operand" "v") - (match_operand:VF_128 3 "nonimm_or_0_operand" "0C") + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (vec_merge:VFH_128 + (match_operand:VFH_128 2 "register_operand" "v") + (match_operand:VFH_128 3 "nonimm_or_0_operand" "0C") (match_operand:QI 4 "register_operand" "Yk")) - (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VFH_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" @@ -1368,7 +1408,7 @@ (vec_merge:<ssevecmode> (vec_merge:<ssevecmode> (vec_duplicate:<ssevecmode> - (match_operand:MODEF 1 "memory_operand")) + (match_operand:MODEFH 1 "memory_operand")) (match_operand:<ssevecmode> 2 "nonimm_or_0_operand") (match_operand:QI 3 "register_operand")) (match_dup 4) @@ -1381,7 +1421,7 @@ (vec_merge:<ssevecmode> (vec_merge:<ssevecmode> (vec_duplicate:<ssevecmode> - (match_operand:MODEF 1 "memory_operand" "m")) + (match_operand:MODEFH 1 "memory_operand" "m")) (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C") (match_operand:QI 3 "register_operand" "Yk")) (match_operand:<ssevecmode> 4 "const0_operand" "C") @@ -1394,11 +1434,11 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx512f_store<mode>_mask" - [(set (match_operand:MODEF 0 "memory_operand" "=m") - (if_then_else:MODEF + [(set (match_operand:MODEFH 0 "memory_operand" "=m") + (if_then_else:MODEFH (and:QI (match_operand:QI 2 "register_operand" "Yk") (const_int 1)) - (vec_select:MODEF + (vec_select:MODEFH (match_operand:<ssevecmode> 1 "register_operand" "v") (parallel [(const_int 0)])) (match_dup 0)))] @@ -2338,6 +2378,30 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) +(define_insn "avx512fp16_rcp<mode>2<mask_name>" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v") + (unspec:VF_AVX512FP16VL + [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "vm")] + UNSPEC_RCP))] + "TARGET_AVX512FP16" + "vrcpph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx512fp16_vmrcpv8hf2<mask_scalar_name>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")] + UNSPEC_RCP) + (match_operand:V8HF 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vrcpsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_insn "<mask_codefor>rcp14<mode><mask_name>" [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") (unspec:VF_AVX512VL @@ -2381,8 +2445,8 @@ (set_attr "mode" "<MODE>")]) (define_expand "sqrt<mode>2" - [(set (match_operand:VF2 0 "register_operand") - (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))] + [(set (match_operand:VF2H 0 "register_operand") + (sqrt:VF2H (match_operand:VF2H 1 "vector_operand")))] "TARGET_SSE2") (define_expand "sqrt<mode>2" @@ -2402,8 +2466,8 @@ }) (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>" - [(set (match_operand:VF 0 "register_operand" "=x,v") - (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))] + [(set (match_operand:VFH 0 "register_operand" "=x,v") + (sqrt:VFH (match_operand:VFH 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))] "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" "@ sqrt<ssemodesuffix>\t{%1, %0|%0, %1} @@ -2416,11 +2480,11 @@ (set_attr "mode" "<MODE>")]) (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=x,v") - (vec_merge:VF_128 - (sqrt:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")) - (match_operand:VF_128 2 "register_operand" "0,v") + [(set (match_operand:VFH_128 0 "register_operand" "=x,v") + (vec_merge:VFH_128 + (sqrt:VFH_128 + (match_operand:VFH_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")) + (match_operand:VFH_128 2 "register_operand" "0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -2473,6 +2537,16 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) +(define_insn "<sse>_rsqrt<mode>2<mask_name>" + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v") + (unspec:VF_AVX512FP16VL + [(match_operand:VF_AVX512FP16VL 1 "vector_operand" "vBm")] UNSPEC_RSQRT))] + "TARGET_AVX512FP16" + "vrsqrtph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + (define_insn "<mask_codefor>rsqrt14<mode><mask_name>" [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") (unspec:VF_AVX512VL @@ -2548,6 +2622,19 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) +(define_insn "avx512fp16_vmrsqrtv8hf2<mask_scalar_name>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")] + UNSPEC_RSQRT) + (match_operand:V8HF 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vrsqrtsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}" + [(set_attr "type" "sse") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + (define_expand "cond_<code><mode>" [(set (match_operand:VF 0 "register_operand") (vec_merge:VF @@ -3200,28 +3287,28 @@ }) (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_REDUCE))] - "TARGET_AVX512DQ" + "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))" "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (unspec:VFH_128 + [(match_operand:VFH_128 1 "register_operand" "v") + (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_REDUCE) (match_dup 1) (const_int 1)))] - "TARGET_AVX512DQ" + "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))" "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -5655,6 +5742,552 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; +;; Parallel half-precision floating point conversion operations +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_int_iterator UNSPEC_US_FIX_NOTRUNC + [UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_FIX_NOTRUNC]) + +(define_int_attr sseintconvertsignprefix + [(UNSPEC_UNSIGNED_FIX_NOTRUNC "u") + (UNSPEC_FIX_NOTRUNC "")]) + +(define_mode_attr qq2phsuff + [(V32HI "") (V16HI "") (V8HI "") + (V16SI "") (V8SI "{y}") (V4SI "{x}") + (V8DI "{z}") (V4DI "{y}") (V2DI "{x}") + (V16SF "") (V8SF "{y}") (V4SF "{x}") + (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")]) + +(define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>" + [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v") + (unspec:VI248_AVX512VL + [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>" + [(set (match_operand:<ssePHmode> 0 "register_operand" "=v") + (any_float:<ssePHmode> + (match_operand:VI2H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))] + "TARGET_AVX512FP16" + "vcvt<floatsuffix><sseintconvert>2ph<round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm")) + (match_operand:V4HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (any_float:V4HF (match_operand:VI4_128_8_256 1 + "vector_operand" "vm")) + (match_operand:V4HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm")) + (match_operand:V6HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix>qq2ph{x}\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (any_float:V2HF (match_operand:V2DI 1 + "vector_operand" "vm")) + (match_operand:V2HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(vec_select:HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0)]))] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %1<round_op2>}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (unspec:SWI48 + [(match_operand:HF 1 "nonimmediate_operand" "v,m")] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_mode_attr sseicvtsuffix + [(SI "l") (DI "q")]) +(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (vec_duplicate:V8HF + (any_float:HF + (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>"))) + (match_operand:V8HF 1 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvt<floatsuffix>si2sh{<sseicvtsuffix>}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) + +(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>" + [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v") + (any_fix:VI2H_AVX512VL + (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] + "TARGET_AVX512FP16" + "vcvttph2<fixsuffix><sseintconvert>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>" + [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") + (any_fix:VI4_128_8_256 + (vec_select:V4HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2<fixsuffix><sseintconvert>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>" + [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") + (any_fix:VI4_128_8_256 + (match_operand:V4HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2<fixsuffix><sseintconvert>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (any_fix:V2DI + (vec_select:V2HF + (match_operand:V8HF 1 "nonimmediate_operand" "v") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_fix<fixunssuffix>_truncv2di2_load<mask_name>" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (any_fix:V2DI + (match_operand:V2HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvttph2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_fix:SWI48 + (vec_select:HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0)]))))] + "TARGET_AVX512FP16" + "%vcvttsh2<fixsuffix>si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2_mem" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_fix:SWI48 + (match_operand:HF 1 "memory_operand" "vm")))] + "TARGET_AVX512FP16" + "%vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_mode_attr ph2pssuffix + [(V16SF "x") (V8SF "x") (V4SF "x") + (V8DF "") (V4DF "") (V2DF "")]) + +(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>" + [(set (match_operand:VF48H_AVX512VL 0 "register_operand" "=v") + (float_extend:VF48H_AVX512VL + (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] + "TARGET_AVX512FP16" + "vcvtph2<castmode><ph2pssuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name>" + [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v") + (float_extend:VF4_128_8_256 + (vec_select:V4HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512fp16_float_extend_ph<mode>2_load<mask_name>" + [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v") + (float_extend:VF4_128_8_256 + (match_operand:V4HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_float_extend_phv2df2<mask_name>" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (float_extend:V2DF + (vec_select:V2HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_float_extend_phv2df2_load<mask_name>" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (float_extend:V2DF + (match_operand:V2HF 1 "memory_operand" "m")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>" + [(set (match_operand:<ssePHmode> 0 "register_operand" "=v") + (float_truncate:<ssePHmode> + (match_operand:VF48H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))] + "TARGET_AVX512FP16" + "vcvt<castmode>2ph<ph2pssuffix><round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<castmode>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V4HF + (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V4HF + (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (match_operand:V4HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<castmode>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF + (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF + (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF + (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (match_operand:V4HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvtpd2ph_v2df" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V2HF + (match_operand:V2DF 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvtpd2ph_v2df" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V2HF + (match_operand:V2DF 1 "vector_operand" "vm")) + (match_operand:V6HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_expand "avx512fp16_vcvtpd2ph_v2df_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF + (match_operand:V2DF 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvtpd2ph_v2df_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_vcvtpd2ph_v2df_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF + (match_operand:V2DF 1 "vector_operand" "vm")) + (match_operand:V2HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>" + [(set (match_operand:VF48_128 0 "register_operand" "=v") + (vec_merge:VF48_128 + (vec_duplicate:VF48_128 + (float_extend:<ssescalarmode> + (vec_select:HF + (match_operand:V8HF 1 "register_operand" "v") + (parallel [(const_int 0)])))) + (match_operand:VF48_128 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvtsh2<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_saeonly_scalar_mask_op3>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name>_mem" + [(set (match_operand:VF48_128 0 "register_operand" "=v") + (vec_merge:VF48_128 + (vec_duplicate:VF48_128 + (float_extend:<ssescalarmode> + (match_operand:HF 1 "memory_operand" "m"))) + (match_operand:VF48_128 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvtsh2<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (vec_duplicate:V8HF + (float_truncate:HF + (vec_select:<ssescalarmode> + (match_operand:VF48_128 1 "register_operand" "v") + (parallel [(const_int 0)])))) + (match_operand:V8HF 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvt<ssescalarmodesuffix>2sh\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name>_mem" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (vec_duplicate:V8HF + (float_truncate:HF + (match_operand:MODEF 1 "memory_operand" "m"))) + (match_operand:V8HF 2 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvt<ssescalarmodesuffix>2sh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Parallel single-precision floating point conversion operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -8759,11 +9392,11 @@ ;; vmovw clears also the higer bits (define_insn "vec_set<mode>_0" - [(set (match_operand:VF_AVX512FP16 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512FP16 - (vec_duplicate:VF_AVX512FP16 - (match_operand:HF 2 "nonimmediate_operand" "r,m")) - (match_operand:VF_AVX512FP16 1 "const0_operand" "C,C") + [(set (match_operand:VI2F 0 "register_operand" "=v,v") + (vec_merge:VI2F + (vec_duplicate:VI2F + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m")) + (match_operand:VI2F 1 "const0_operand" "C,C") (const_int 1)))] "TARGET_AVX512FP16" "@ @@ -9031,7 +9664,8 @@ [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")]) (define_mode_attr extract_suf - [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")]) + [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2") + (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")]) (define_mode_iterator AVX512_VEC [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI]) @@ -9891,16 +10525,33 @@ "operands[1] = gen_lowpart (HFmode, operands[1]);") (define_insn "*vec_extracthf" - [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=r,m") + [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v") (vec_select:HF - (match_operand:V8HF 1 "register_operand" "v,v") + (match_operand:V8HF 1 "register_operand" "v,v,0,v") (parallel [(match_operand:SI 2 "const_0_to_7_operand")])))] "TARGET_SSE2" - "@ - vpextrw\t{%2, %1, %k0|%k0, %1, %2} - vpextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog1") +{ + switch (which_alternative) + { + case 0: + return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}"; + case 1: + return "vpextrw\t{%2, %1, %0|%0, %1, %2}"; + + case 2: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "psrldq\t{%2, %0|%0, %2}"; + case 3: + operands[2] = GEN_INT (INTVAL (operands[2]) * 2); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,*,noavx,avx") + (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1") (set_attr "prefix" "maybe_evex") (set_attr "mode" "TI")]) @@ -10255,11 +10906,11 @@ }) (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")] + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (unspec:VFH_128 + [(match_operand:VFH_128 1 "register_operand" "v") + (match_operand:VFH_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")] UNSPEC_SCALEF) (match_dup 1) (const_int 1)))] @@ -10269,10 +10920,10 @@ (set_attr "mode" "<ssescalarmode>")]) (define_insn "<avx512>_scalef<mode><mask_name><round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "v") - (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") + (match_operand:VFH_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")] UNSPEC_SCALEF))] "TARGET_AVX512F" "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" @@ -10558,8 +11209,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_GETEXP))] "TARGET_AVX512F" "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"; @@ -10567,11 +11218,11 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")] + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (unspec:VFH_128 + [(match_operand:VFH_128 1 "register_operand" "v") + (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")] UNSPEC_GETEXP) (match_dup 1) (const_int 1)))] @@ -10603,9 +11254,21 @@ (match_operand:V48_256_512_AVX512VL 1 "register_operand" "v") (parallel [(match_operand 2 "<vec_extract_imm_predicate>")])))] "TARGET_AVX512F - && INTVAL(operands[2]) >= 16 / GET_MODE_SIZE (<ssescalarmode>mode)" - "valign<ternlogsuffix>\t{%2, %1, %1, %<xtg_mode>0|%<xtg_mode>0, %1, %1, %2}"; - [(set_attr "prefix" "evex") + && INTVAL(operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode) >= 16" +{ + int byte_offset = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode); + if (byte_offset % 16 == 0) + { + operands[2] = GEN_INT (byte_offset / 16); + if (byte_offset / 16 == 1) + return "vextract<shuffletype><extract_suf>\t{%2, %t1, %x0|%x0, %t1, %2}"; + else + return "vextract<shuffletype><extract_suf>\t{%2, %1, %x0|%x0, %1, %2}"; + } + else + return "valign<ternlogsuffix>\t{%2, %1, %1, %<xtg_mode>0|%<xtg_mode>0, %1, %1, %2}"; +} + [(set_attr "prefix" "maybe_evex") (set_attr "mode" "<sseintvecinsnmode>")]) (define_expand "avx512f_shufps512_mask" @@ -10737,9 +11400,9 @@ (set_attr "mode" "<ssescalarmode>")]) (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_ROUND))] "TARGET_AVX512F" @@ -10749,13 +11412,13 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (unspec:VF_128 - [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (unspec:VFH_128 + [(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_ROUND) - (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VFH_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}" @@ -10764,14 +11427,14 @@ (set_attr "mode" "<MODE>")]) (define_insn "*avx512f_rndscale<mode><round_saeonly_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (vec_duplicate:VF_128 + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (vec_duplicate:VFH_128 (unspec:<ssescalarmode> [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_ROUND)) - (match_operand:VF_128 1 "register_operand" "v") + (match_operand:VFH_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" @@ -15359,12 +16022,12 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "avx512bw_interleave_highv32hi<mask_name>" - [(set (match_operand:V32HI 0 "register_operand" "=v") - (vec_select:V32HI - (vec_concat:V64HI - (match_operand:V32HI 1 "register_operand" "v") - (match_operand:V32HI 2 "nonimmediate_operand" "vm")) +(define_insn "avx512bw_interleave_high<mode><mask_name>" + [(set (match_operand:V32_512 0 "register_operand" "=v") + (vec_select:V32_512 + (vec_concat:<ssedoublevecmode> + (match_operand:V32_512 1 "register_operand" "v") + (match_operand:V32_512 2 "nonimmediate_operand" "vm")) (parallel [(const_int 4) (const_int 36) (const_int 5) (const_int 37) (const_int 6) (const_int 38) @@ -15387,12 +16050,12 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "avx2_interleave_highv16hi<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=Yw") - (vec_select:V16HI - (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "Yw") - (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) +(define_insn "avx2_interleave_high<mode><mask_name>" + [(set (match_operand:V16_256 0 "register_operand" "=Yw") + (vec_select:V16_256 + (vec_concat:<ssedoublevecmode> + (match_operand:V16_256 1 "register_operand" "Yw") + (match_operand:V16_256 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 4) (const_int 20) (const_int 5) (const_int 21) (const_int 6) (const_int 22) @@ -15407,12 +16070,12 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) -(define_insn "vec_interleave_highv8hi<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,Yw") - (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) +(define_insn "vec_interleave_high<mode><mask_name>" + [(set (match_operand:V8_128 0 "register_operand" "=x,Yw") + (vec_select:V8_128 + (vec_concat:<ssedoublevecmode> + (match_operand:V8_128 1 "register_operand" "0,Yw") + (match_operand:V8_128 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) @@ -15427,12 +16090,12 @@ (set_attr "prefix" "orig,maybe_vex") (set_attr "mode" "TI")]) -(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>" - [(set (match_operand:V32HI 0 "register_operand" "=v") - (vec_select:V32HI - (vec_concat:V64HI - (match_operand:V32HI 1 "register_operand" "v") - (match_operand:V32HI 2 "nonimmediate_operand" "vm")) +(define_insn "<mask_codefor>avx512bw_interleave_low<mode><mask_name>" + [(set (match_operand:V32_512 0 "register_operand" "=v") + (vec_select:V32_512 + (vec_concat:<ssedoublevecmode> + (match_operand:V32_512 1 "register_operand" "v") + (match_operand:V32_512 2 "nonimmediate_operand" "vm")) (parallel [(const_int 0) (const_int 32) (const_int 1) (const_int 33) (const_int 2) (const_int 34) @@ -15455,12 +16118,12 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "avx2_interleave_lowv16hi<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=Yw") - (vec_select:V16HI - (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "Yw") - (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) +(define_insn "avx2_interleave_low<mode><mask_name>" + [(set (match_operand:V16_256 0 "register_operand" "=Yw") + (vec_select:V16_256 + (vec_concat:<ssedoublevecmode> + (match_operand:V16_256 1 "register_operand" "Yw") + (match_operand:V16_256 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 0) (const_int 16) (const_int 1) (const_int 17) (const_int 2) (const_int 18) @@ -15475,12 +16138,12 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) -(define_insn "vec_interleave_lowv8hi<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") - (vec_select:V8HI - (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,Yw") - (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) +(define_insn "vec_interleave_low<mode><mask_name>" + [(set (match_operand:V8_128 0 "register_operand" "=x,Yw") + (vec_select:V8_128 + (vec_concat:<ssedoublevecmode> + (match_operand:V8_128 1 "register_operand" "0,Yw") + (match_operand:V8_128 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 2) (const_int 10) @@ -15655,6 +16318,7 @@ (V4SI "avx512dq") (V2DI "avx512dq")]) ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. +;; For V8HFmode and TARGET_AVX2, broadcastw + pblendw should be better. (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v") (vec_merge:PINSR_MODE @@ -15664,7 +16328,8 @@ (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE2 && ((unsigned) exact_log2 (INTVAL (operands[3])) - < GET_MODE_NUNITS (<MODE>mode))" + < GET_MODE_NUNITS (<MODE>mode)) + && !(<MODE>mode == V8HFmode && TARGET_AVX2)" { operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); @@ -15672,26 +16337,18 @@ { case 0: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) - return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; + return "pinsr<sseintmodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; /* FALLTHRU */ case 1: - return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; + return "pinsr<sseintmodesuffix>\t{%3, %2, %0|%0, %2, %3}"; case 2: case 4: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) - { - if (<MODE>mode == V8HFmode) - return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; - else - return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; - } + return "vpinsr<sseintmodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; /* FALLTHRU */ case 3: case 5: - if (<MODE>mode == V8HFmode) - return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; - else - return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + return "vpinsr<sseintmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } @@ -19179,11 +19836,14 @@ (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))] "operands[3] = gen_lowpart (<MODE>mode, operands[3]);") -(define_insn "sse4_1_pblendw" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") - (vec_merge:V8HI - (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm") - (match_operand:V8HI 1 "register_operand" "0,0,x") +(define_mode_attr blendsuf + [(V8HI "w") (V8HF "ph")]) + +(define_insn "sse4_1_pblend<blendsuf>" + [(set (match_operand:V8_128 0 "register_operand" "=Yr,*x,x") + (vec_merge:V8_128 + (match_operand:V8_128 2 "vector_operand" "YrBm,*xBm,xm") + (match_operand:V8_128 1 "register_operand" "0,0,x") (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))] "TARGET_SSE4_1" "@ @@ -19210,6 +19870,47 @@ operands[3] = GEN_INT (val << 8 | val); }) +(define_expand "avx2_pblendph" + [(set (match_operand:V16HF 0 "register_operand") + (vec_merge:V16HF + (match_operand:V16HF 2 "register_operand") + (match_operand:V16HF 1 "register_operand") + (match_operand:SI 3 "const_int_operand")))] + "TARGET_AVX2 + && !((INTVAL (operands[3]) & 0xff) && (INTVAL (operands[3]) & 0xff00))" +{ + int mask = INTVAL (operands[3]); + if (mask == 0) + emit_move_insn (operands[0], operands[1]); + else + { + rtx tmp = gen_reg_rtx (V16HImode); + rtx blendw_idx, blendd_idx; + + if (mask & 0xff) + { + blendw_idx = GEN_INT (mask & 0xff); + blendd_idx = GEN_INT (15); + } + else + { + blendw_idx = GEN_INT (mask >> 8 & 0xff); + blendd_idx = GEN_INT (240); + } + operands[1] = lowpart_subreg (V16HImode, operands[1], V16HFmode); + operands[2] = lowpart_subreg (V16HImode, operands[2], V16HFmode); + emit_insn (gen_avx2_pblendw (tmp, operands[1], operands[2], blendw_idx)); + + operands[0] = lowpart_subreg (V8SImode, operands[0], V16HFmode); + tmp = lowpart_subreg (V8SImode, tmp, V16HImode); + operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode); + emit_insn (gen_avx2_pblenddv8si (operands[0], operands[1], + tmp, blendd_idx)); + } + + DONE; +}) + (define_insn "*avx2_pblendw" [(set (match_operand:V16HI 0 "register_operand" "=x") (vec_merge:V16HI @@ -24714,10 +25415,10 @@ (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VF_AVX512VL 1 "vector_operand" "vm") + [(match_operand:VFH_AVX512VL 1 "vector_operand" "vm") (match_operand 2 "const_0_to_255_operand" "n")] UNSPEC_FPCLASS))] - "TARGET_AVX512DQ" + "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)" "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") @@ -24728,11 +25429,11 @@ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (and:<avx512fmaskmode> (unspec:<avx512fmaskmode> - [(match_operand:VF_128 1 "nonimmediate_operand" "vm") + [(match_operand:VFH_128 1 "nonimmediate_operand" "vm") (match_operand 2 "const_0_to_255_operand" "n")] UNSPEC_FPCLASS) (const_int 1)))] - "TARGET_AVX512DQ" + "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)" "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") @@ -24740,9 +25441,9 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") (match_operand:SI 2 "const_0_to_15_operand")] UNSPEC_GETMANT))] "TARGET_AVX512F" @@ -24751,11 +25452,11 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>" - [(set (match_operand:VF_128 0 "register_operand" "=v") - (vec_merge:VF_128 - (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") + [(set (match_operand:VFH_128 0 "register_operand" "=v") + (vec_merge:VFH_128 + (unspec:VFH_128 + [(match_operand:VFH_128 1 "register_operand" "v") + (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") (match_operand:SI 3 "const_0_to_15_operand")] UNSPEC_GETMANT) (match_dup 1) diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 717561a..157d49f 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -153,6 +153,7 @@ (define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>") (define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>") (define_subst_attr "round_constraint" "round" "vm" "v") +(define_subst_attr "round_qq2phsuff" "round" "<qq2phsuff>" "") (define_subst_attr "bcst_round_constraint" "round" "vmBr" "v") (define_subst_attr "round_constraint2" "round" "m" "v") (define_subst_attr "round_constraint3" "round" "rm" "r") diff --git a/gcc/config/i386/vxworks.h b/gcc/config/i386/vxworks.h index ebda7d9..0676cb4 100644 --- a/gcc/config/i386/vxworks.h +++ b/gcc/config/i386/vxworks.h @@ -73,37 +73,37 @@ along with GCC; see the file COPYING3. If not see VXWORKS_OS_CPP_BUILTINS (); \ if (TARGET_64BIT) \ VX_CPUDEF (X86_64); \ - else if (TARGET_PENTIUM4) \ + else if (TARGET_CPU_P (PENTIUM4)) \ { \ VX_CPUDEF (PENTIUM4); \ VX_CPUVDEF (PENTIUM4); \ } \ - else if (TARGET_CORE2) \ + else if (TARGET_CPU_P (CORE2)) \ VX_CPUDEF (CORE2); \ - else if (TARGET_NEHALEM) \ + else if (TARGET_CPU_P (NEHALEM)) \ VX_CPUDEF (NEHALEM); \ - else if (TARGET_SANDYBRIDGE) \ + else if (TARGET_CPU_P (SANDYBRIDGE)) \ VX_CPUDEF (SANDYBRIDGE); \ - else if (TARGET_HASWELL) \ + else if (TARGET_CPU_P (HASWELL)) \ VX_CPUDEF (HASWELL); \ - else if (TARGET_SILVERMONT) \ + else if (TARGET_CPU_P (SILVERMONT)) \ VX_CPUDEF (SILVERMONT); \ - else if (TARGET_SKYLAKE || TARGET_SKYLAKE_AVX512) \ + else if (TARGET_CPU_P (SKYLAKE) || TARGET_CPU_P (SKYLAKE_AVX512)) \ VX_CPUDEF (SKYLAKE); \ - else if (TARGET_GOLDMONT) \ + else if (TARGET_CPU_P (GOLDMONT)) \ VX_CPUDEF (GOLDMONT); \ else if (TARGET_VXWORKS7) \ VX_CPUDEF (PENTIUM4); \ - else if (TARGET_386) \ + else if (TARGET_CPU_P (I386)) \ VX_CPUDEF (I80386); \ - else if (TARGET_486) \ + else if (TARGET_CPU_P (I486)) \ VX_CPUDEF (I80486); \ - else if (TARGET_PENTIUM) \ + else if (TARGET_CPU_P (PENTIUM)) \ { \ VX_CPUDEF (PENTIUM); \ VX_CPUVDEF (PENTIUM); \ } \ - else if (TARGET_PENTIUMPRO) \ + else if (TARGET_CPU_P (PENTIUMPRO)) \ { \ VX_CPUDEF (PENTIUM2); \ VX_CPUVDEF (PENTIUMPRO); \ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index ffe810f..93644be 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2734,6 +2734,130 @@ struct processor_costs slm_cost = { "16", /* Func alignment. */ }; +static stringop_algs tremont_memcpy[2] = { + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}}; +static stringop_algs tremont_memset[2] = { + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}}; +static const +struct processor_costs tremont_cost = { + { + /* Start of register allocator costs. integer->integer move cost is 2. */ + 6, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 12}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {6, 6}, /* cost of storing MMX registers + in SImode and DImode */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 10, 15}, /* cost of loading SSE registers + in 32,64,128,256 and 512-bit */ + {6, 6, 6, 10, 15}, /* cost of storing SSE registers + in 32,64,128,256 and 512-bit */ + 6, 6, /* SSE->integer and integer->SSE moves */ + 6, 6, /* mask->integer and integer->mask moves */ + {6, 6, 6}, /* cost of loading mask register + in QImode, HImode, SImode. */ + {6, 6, 6}, /* cost if storing mask register + in QImode, HImode, SImode. */ + 2, /* cost of moving mask register. */ + /* End of register allocator costs. */ + }, + + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* Setting cost to 2 makes our current implementation of synth_mult result in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (4)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (22), /* HI */ + COSTS_N_INSNS (30), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 17, /* CLEAR_RATIO */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {6, 6, 6}, /* cost of storing integer registers */ + {6, 6, 6, 10, 15}, /* cost of loading SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 10, 15}, /* cost of storing SSE register + in 32bit, 64bit, 128bit, 256bit and 512bit */ + {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ + {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ + 6, /* cost of moving SSE register to integer. */ + 18, 6, /* Gather load static, per_elt. */ + 18, 6, /* Gather store static, per_elt. */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* Benchmarks shows large regressions on K8 sixtrack benchmark when this + value is increased to perhaps more appropriate value of 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (17), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (14), /* cost of FSQRT instruction. */ + + COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (4), /* cost of MULSS instruction. */ + COSTS_N_INSNS (5), /* cost of MULSD instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ + COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ + COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ + COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ + COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ + COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ + tremont_memcpy, + tremont_memset, + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ + "16:11:8", /* Loop alignment. */ + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ +}; + static stringop_algs intel_memcpy[2] = { {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c index 2e5ee4e..56ada99 100644 --- a/gcc/config/i386/x86-tune-sched.c +++ b/gcc/config/i386/x86-tune-sched.c @@ -71,6 +71,7 @@ ix86_issue_rate (void) case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: + case PROCESSOR_TREMONT: case PROCESSOR_GENERIC: return 4; @@ -429,6 +430,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: case PROCESSOR_HASWELL: + case PROCESSOR_TREMONT: case PROCESSOR_GENERIC: /* Stack engine allows to execute push&pop instructions in parall. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 2f221b1..58e8ead 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -62,6 +62,21 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", that can be partly masked by careful scheduling of moves. */ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 + | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC) + +/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids + partial write to the destination in scalar SSE conversion from FP + to FP. */ +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, + "sse_partial_reg_fp_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 + | m_BDVER | m_ZNVER | m_GENERIC) + +/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial + write to the destination in scalar SSE conversion from integer to FP. */ +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + "sse_partial_reg_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 | m_BDVER | m_ZNVER | m_GENERIC) /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies @@ -136,7 +151,7 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL - | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ATHLON_K8) + | m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8) /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are considered on critical path. */ @@ -150,14 +165,15 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", - m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC) + m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT + | m_GENERIC) /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. Some chips, like 486 and Pentium works faster with separate load and push instructions. */ DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE - | m_GENERIC) + | m_TREMONT | m_GENERIC) /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred over esp subtraction. */ @@ -198,8 +214,7 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", than 4 branch instructions in the 16 byte window. */ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM - | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL | m_ATHLON_K8 - | m_AMDFAM10) + | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ATHLON_K8 | m_AMDFAM10) /*****************************************************************************/ /* Integer instruction selection tuning */ @@ -240,11 +255,11 @@ DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL - | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL) + | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL) /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", - m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT + m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_KNL | m_KNM) /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is @@ -263,7 +278,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8", a conditional move. */ DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove", m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_KNL - | m_KNM | m_TREMONT | m_INTEL) + | m_KNM | m_INTEL) /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */ @@ -273,7 +288,7 @@ DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) move/set sequences of bytes with known size. */ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, "prefer_known_rep_movsb_stosb", - m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512) + m_SKYLAKE | m_ALDERLAKE | m_TREMONT | m_CORE_AVX512) /* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of compact prologues and epilogues by issuing a misaligned moves. This @@ -282,7 +297,8 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, FIXME: This may actualy be a win on more targets than listed here. */ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, "misaligned_move_string_pro_epilogues", - m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC) + m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT + | m_GENERIC) /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", @@ -294,7 +310,7 @@ DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", ~(m_PENT | m_LAKEMONT | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL - | m_K6 | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT)) + | m_K6 | m_GOLDMONT | m_GOLDMONT_PLUS)) /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */ DEF_TUNE (X86_TUNE_USE_BT, "use_bt", @@ -305,7 +321,7 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt", /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency for bit-manipulation instructions. */ DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", - m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) + m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_GENERIC) /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based on hardware capabilities. Bdver3 hardware has a loop buffer which makes @@ -321,14 +337,14 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", - m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) + m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC) /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */ DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs", m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT - | m_GOLDMONT_PLUS | m_TREMONT ) + | m_GOLDMONT_PLUS) /*****************************************************************************/ /* 387 instruction selection tuning */ @@ -386,13 +402,13 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", - m_AMD_MULTIPLE | m_CORE_ALL | m_GENERIC) + m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_GENERIC) /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to xorps/xorpd and other variants. */ DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER - | m_GENERIC) + | m_TREMONT | m_GENERIC) /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer to SSE registers. If disabled, the moves will be done by storing @@ -419,7 +435,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", fp converts to destination register. */ DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts", m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS - | m_TREMONT | m_INTEL) + | m_INTEL) /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion from FP to FP. This form of instructions avoids partial write to the @@ -434,7 +450,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb", m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT - | m_GOLDMONT_PLUS | m_TREMONT | m_INTEL) + | m_GOLDMONT_PLUS | m_INTEL) /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", diff --git a/gcc/config/mips/netbsd.h b/gcc/config/mips/netbsd.h index 85c2779..1c6a59d 100644 --- a/gcc/config/mips/netbsd.h +++ b/gcc/config/mips/netbsd.h @@ -87,7 +87,7 @@ along with GCC; see the file COPYING3. If not see else if (mips_isa >= MIPS_ISA_MIPS32 \ && mips_isa < MIPS_ISA_MIPS64) \ builtin_define ("__mips=32"); \ - else if (mips_isa >= MIPS_ISA_64) \ + else if (mips_isa >= MIPS_ISA_MIPS64) \ builtin_define ("__mips=64"); \ if (mips_isa_rev > 0) \ builtin_define_with_int_value ("__mips_isa_rev", \ diff --git a/gcc/config/rs6000/lynx.h b/gcc/config/rs6000/lynx.h index 3434c8b..0ddb54f 100644 --- a/gcc/config/rs6000/lynx.h +++ b/gcc/config/rs6000/lynx.h @@ -80,7 +80,6 @@ #undef SIZE_TYPE #undef ASM_OUTPUT_ALIGN -#undef PREFERRED_DEBUGGING_TYPE /* The file rs6000.c defines TARGET_HAVE_TLS unconditionally to the value of HAVE_AS_TLS. HAVE_AS_TLS is true as gas support for TLS diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 1f6fc03..1990a21 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -91,7 +91,10 @@ UNSPEC_MMA_XVI8GER4SPP UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC - UNSPEC_MMA_XXSETACCZ + ]) + +(define_c_enum "unspecv" + [UNSPECV_MMA_XXSETACCZ ]) ;; MMA instructions with 1 accumulator argument @@ -467,30 +470,16 @@ "<acc> %A0" [(set_attr "type" "mma")]) -;; We can't have integer constants in XOmode so we wrap this in an UNSPEC. - -(define_expand "mma_xxsetaccz" - [(set (match_operand:XO 0 "fpr_reg_operand") - (const_int 0))] - "TARGET_MMA" -{ - rtx xo0 = gen_rtx_UNSPEC (XOmode, gen_rtvec (1, const0_rtx), - UNSPEC_MMA_XXSETACCZ); - emit_insn (gen_rtx_SET (operands[0], xo0)); - DONE; -}) +;; We can't have integer constants in XOmode so we wrap this in an +;; UNSPEC_VOLATILE. -(define_insn_and_split "*mma_xxsetaccz" +(define_insn "mma_xxsetaccz" [(set (match_operand:XO 0 "fpr_reg_operand" "=d") - (unspec:XO [(match_operand 1 "const_0_to_1_operand" "O")] - UNSPEC_MMA_XXSETACCZ))] + (unspec_volatile:XO [(const_int 0)] + UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" "xxsetaccz %A0" - "&& reload_completed" - [(set (match_dup 0) (unspec:XO [(match_dup 1)] UNSPEC_MMA_XXSETACCZ))] - "" - [(set_attr "type" "mma") - (set_attr "length" "4")]) + [(set_attr "type" "mma")]) (define_insn "mma_<vv>" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def index 6a28d51..a8c6b9e 100644 --- a/gcc/config/rs6000/rs6000-builtin-new.def +++ b/gcc/config/rs6000/rs6000-builtin-new.def @@ -208,6 +208,12 @@ double __builtin_mffs (); MFFS rs6000_mffs {} +; Although the mffsl instruction is only available on POWER9 and later +; processors, this builtin automatically falls back to mffs on older +; platforms. Thus it appears here in the [always] stanza. + double __builtin_mffsl (); + MFFSL rs6000_mffsl {} + ; This thing really assumes long double == __ibm128, and I'm told it has ; been used as such within libgcc. Given that __builtin_pack_ibm128 ; exists for the same purpose, this should really not be used at all. @@ -2784,9 +2790,6 @@ signed long long __builtin_darn_raw (); DARN_RAW darn_raw {} - double __builtin_mffsl (); - MFFSL rs6000_mffsl {} - const signed int __builtin_dtstsfi_eq_dd (const int<6>, _Decimal64); TSTSFI_EQ_DD dfptstsfi_eq_dd {} diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index afcb5bb..d08bdfe 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -35,6 +35,9 @@ #include "langhooks.h" #include "c/c-tree.h" +#include "rs6000-builtins.h" + +static tree altivec_resolve_new_overloaded_builtin (location_t, tree, void *); /* Handle the machine specific pragma longcall. Its syntax is @@ -811,6 +814,32 @@ is_float128_p (tree t) && t == long_double_type_node)); } + +/* Return true iff ARGTYPE can be compatibly passed as PARMTYPE. */ +static bool +rs6000_new_builtin_type_compatible (tree parmtype, tree argtype) +{ + if (parmtype == error_mark_node) + return false; + + if (INTEGRAL_TYPE_P (parmtype) && INTEGRAL_TYPE_P (argtype)) + return true; + + if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 + && is_float128_p (parmtype) && is_float128_p (argtype)) + return true; + + if (POINTER_TYPE_P (parmtype) && POINTER_TYPE_P (argtype)) + { + parmtype = TREE_TYPE (parmtype); + argtype = TREE_TYPE (argtype); + if (TYPE_READONLY (argtype)) + parmtype = build_qualified_type (parmtype, TYPE_QUAL_CONST); + } + + return lang_hooks.types_compatible_p (parmtype, argtype); +} + static inline bool rs6000_builtin_type_compatible (tree t, int id) { @@ -927,6 +956,10 @@ tree altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, void *passed_arglist) { + if (new_builtins_are_live) + return altivec_resolve_new_overloaded_builtin (loc, fndecl, + passed_arglist); + vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist); unsigned int nargs = vec_safe_length (arglist); enum rs6000_builtins fcode @@ -1930,3 +1963,1048 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, return error_mark_node; } } + +/* Build a tree for a function call to an Altivec non-overloaded builtin. + The overloaded builtin that matched the types and args is described + by DESC. The N arguments are given in ARGS, respectively. + + Actually the only thing it does is calling fold_convert on ARGS, with + a small exception for vec_{all,any}_{ge,le} predicates. */ + +static tree +altivec_build_new_resolved_builtin (tree *args, int n, tree fntype, + tree ret_type, + rs6000_gen_builtins bif_id, + rs6000_gen_builtins ovld_id) +{ + tree argtypes = TYPE_ARG_TYPES (fntype); + tree arg_type[MAX_OVLD_ARGS]; + tree fndecl = rs6000_builtin_decls_x[bif_id]; + + for (int i = 0; i < n; i++) + { + arg_type[i] = TREE_VALUE (argtypes); + argtypes = TREE_CHAIN (argtypes); + } + + /* The AltiVec overloading implementation is overall gross, but this + is particularly disgusting. The vec_{all,any}_{ge,le} builtins + are completely different for floating-point vs. integer vector + types, because the former has vcmpgefp, but the latter should use + vcmpgtXX. + + In practice, the second and third arguments are swapped, and the + condition (LT vs. EQ, which is recognizable by bit 1 of the first + argument) is reversed. Patch the arguments here before building + the resolved CALL_EXPR. */ + if (n == 3 + && ovld_id == RS6000_OVLD_VEC_CMPGE_P + && bif_id != RS6000_BIF_VCMPGEFP_P + && bif_id != RS6000_BIF_XVCMPGEDP_P) + { + std::swap (args[1], args[2]); + std::swap (arg_type[1], arg_type[2]); + + args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0], + build_int_cst (NULL_TREE, 2)); + } + + for (int j = 0; j < n; j++) + args[j] = fully_fold_convert (arg_type[j], args[j]); + + /* If the number of arguments to an overloaded function increases, + we must expand this switch. */ + gcc_assert (MAX_OVLD_ARGS <= 4); + + tree call; + switch (n) + { + case 0: + call = build_call_expr (fndecl, 0); + break; + case 1: + call = build_call_expr (fndecl, 1, args[0]); + break; + case 2: + call = build_call_expr (fndecl, 2, args[0], args[1]); + break; + case 3: + call = build_call_expr (fndecl, 3, args[0], args[1], args[2]); + break; + case 4: + call = build_call_expr (fndecl, 4, args[0], args[1], args[2], args[3]); + break; + default: + gcc_unreachable (); + } + return fold_convert (ret_type, call); +} + +/* Implementation of the resolve_overloaded_builtin target hook, to + support Altivec's overloaded builtins. FIXME: This code needs + to be brutally factored. */ + +static tree +altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl, + void *passed_arglist) +{ + vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist); + unsigned int nargs = vec_safe_length (arglist); + enum rs6000_gen_builtins fcode + = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); + tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + tree types[MAX_OVLD_ARGS]; + tree args[MAX_OVLD_ARGS]; + + /* Return immediately if this isn't an overload. */ + if (fcode <= RS6000_OVLD_NONE) + return NULL_TREE; + + unsigned int adj_fcode = fcode - RS6000_OVLD_NONE; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n", + (int) fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + + /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */ + if (fcode == RS6000_OVLD_VEC_LVSL && !BYTES_BIG_ENDIAN) + warning (OPT_Wdeprecated, + "%<vec_lvsl%> is deprecated for little endian; use " + "assignment for unaligned loads and stores"); + else if (fcode == RS6000_OVLD_VEC_LVSR && !BYTES_BIG_ENDIAN) + warning (OPT_Wdeprecated, + "%<vec_lvsr%> is deprecated for little endian; use " + "assignment for unaligned loads and stores"); + + if (fcode == RS6000_OVLD_VEC_MUL) + { + /* vec_mul needs to be special cased because there are no instructions + for it for the {un}signed char, {un}signed short, and {un}signed int + types. */ + if (nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", "vec_mul"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + + /* Both arguments must be vectors and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_TImode: + { + /* For scalar types just use a multiply expression. */ + return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0, + fold_convert (TREE_TYPE (arg0), arg1)); + } + case E_SFmode: + { + /* For floats use the xvmulsp instruction directly. */ + tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULSP]; + return build_call_expr (call, 2, arg0, arg1); + } + case E_DFmode: + { + /* For doubles use the xvmuldp instruction directly. */ + tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULDP]; + return build_call_expr (call, 2, arg0, arg1); + } + /* Other types are errors. */ + default: + goto bad; + } + } + + if (fcode == RS6000_OVLD_VEC_CMPNE) + { + /* vec_cmpne needs to be special cased because there are no instructions + for it (prior to power 9). */ + if (nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", "vec_cmpne"); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + + /* Both arguments must be vectors and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) + goto bad; + + /* Power9 instructions provide the most efficient implementation of + ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode + or SFmode or DFmode. */ + if (!TARGET_P9_VECTOR + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode) + || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode)) + { + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb), + vec_cmpeq (va, vb)). */ + /* Note: vec_nand also works but opt changes vec_nand's + to vec_nor's anyway. */ + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_TImode: + case E_SFmode: + case E_DFmode: + { + /* call = vec_cmpeq (va, vb) + result = vec_nor (call, call). */ + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + tree call = altivec_resolve_new_overloaded_builtin + (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_CMPEQ], + params); + /* Use save_expr to ensure that operands used more than once + that may have side effects (like calls) are only evaluated + once. */ + call = save_expr (call); + params = make_tree_vector (); + vec_safe_push (params, call); + vec_safe_push (params, call); + return altivec_resolve_new_overloaded_builtin + (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_NOR], params); + } + /* Other types are errors. */ + default: + goto bad; + } + } + /* else, fall through and process the Power9 alternative below */ + } + + if (fcode == RS6000_OVLD_VEC_ADDE || fcode == RS6000_OVLD_VEC_SUBE) + { + /* vec_adde needs to be special cased because there is no instruction + for the {un}signed int version. */ + if (nargs != 3) + { + const char *name; + name = fcode == RS6000_OVLD_VEC_ADDE ? "vec_adde" : "vec_sube"; + error ("builtin %qs only accepts 3 arguments", name); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + tree arg2 = (*arglist)[2]; + tree arg2_type = TREE_TYPE (arg2); + + /* All 3 arguments must be vectors of (signed or unsigned) (int or + __int128) and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* For {un}signed ints, + vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), + vec_and (carryv, 1)). + vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb), + vec_and (carryv, 1)). */ + case E_SImode: + { + tree add_sub_builtin; + + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + + if (fcode == RS6000_OVLD_VEC_ADDE) + add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD]; + else + add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB]; + + tree call + = altivec_resolve_new_overloaded_builtin (loc, + add_sub_builtin, + params); + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); + tree ones_vector = build_vector_from_val (arg0_type, const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, + arg2, ones_vector); + params = make_tree_vector (); + vec_safe_push (params, call); + vec_safe_push (params, and_expr); + return altivec_resolve_new_overloaded_builtin (loc, + add_sub_builtin, + params); + } + /* For {un}signed __int128s use the vaddeuqm/vsubeuqm instruction + directly. */ + case E_TImode: + break; + + /* Types other than {un}signed int and {un}signed __int128 + are errors. */ + default: + goto bad; + } + } + + if (fcode == RS6000_OVLD_VEC_ADDEC || fcode == RS6000_OVLD_VEC_SUBEC) + { + /* vec_addec and vec_subec needs to be special cased because there is + no instruction for the {un}signed int version. */ + if (nargs != 3) + { + const char *name; + name = fcode == RS6000_OVLD_VEC_ADDEC ? "vec_addec" : "vec_subec"; + error ("builtin %qs only accepts 3 arguments", name); + return error_mark_node; + } + + tree arg0 = (*arglist)[0]; + tree arg0_type = TREE_TYPE (arg0); + tree arg1 = (*arglist)[1]; + tree arg1_type = TREE_TYPE (arg1); + tree arg2 = (*arglist)[2]; + tree arg2_type = TREE_TYPE (arg2); + + /* All 3 arguments must be vectors of (signed or unsigned) (int or + __int128) and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + goto bad; + + switch (TYPE_MODE (TREE_TYPE (arg0_type))) + { + /* For {un}signed ints, + vec_addec (va, vb, carryv) == + vec_or (vec_addc (va, vb), + vec_addc (vec_add (va, vb), + vec_and (carryv, 0x1))). */ + case E_SImode: + { + /* Use save_expr to ensure that operands used more than once + that may have side effects (like calls) are only evaluated + once. */ + tree as_builtin; + tree as_c_builtin; + + arg0 = save_expr (arg0); + arg1 = save_expr (arg1); + vec<tree, va_gc> *params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + + if (fcode == RS6000_OVLD_VEC_ADDEC) + as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADDC]; + else + as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUBC]; + + tree call1 = altivec_resolve_new_overloaded_builtin (loc, + as_c_builtin, + params); + params = make_tree_vector (); + vec_safe_push (params, arg0); + vec_safe_push (params, arg1); + + if (fcode == RS6000_OVLD_VEC_ADDEC) + as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD]; + else + as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB]; + + tree call2 = altivec_resolve_new_overloaded_builtin (loc, + as_builtin, + params); + tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); + tree ones_vector = build_vector_from_val (arg0_type, const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, + arg2, ones_vector); + params = make_tree_vector (); + vec_safe_push (params, call2); + vec_safe_push (params, and_expr); + call2 = altivec_resolve_new_overloaded_builtin (loc, as_c_builtin, + params); + params = make_tree_vector (); + vec_safe_push (params, call1); + vec_safe_push (params, call2); + tree or_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_OR]; + return altivec_resolve_new_overloaded_builtin (loc, or_builtin, + params); + } + /* For {un}signed __int128s use the vaddecuq/vsubbecuq + instructions. This occurs through normal processing. */ + case E_TImode: + break; + + /* Types other than {un}signed int and {un}signed __int128 + are errors. */ + default: + goto bad; + } + } + + /* For now treat vec_splats and vec_promote as the same. */ + if (fcode == RS6000_OVLD_VEC_SPLATS || fcode == RS6000_OVLD_VEC_PROMOTE) + { + tree type, arg; + int size; + int i; + bool unsigned_p; + vec<constructor_elt, va_gc> *vec; + const char *name; + name = fcode == RS6000_OVLD_VEC_SPLATS ? "vec_splats" : "vec_promote"; + + if (fcode == RS6000_OVLD_VEC_SPLATS && nargs != 1) + { + error ("builtin %qs only accepts 1 argument", name); + return error_mark_node; + } + if (fcode == RS6000_OVLD_VEC_PROMOTE && nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", name); + return error_mark_node; + } + /* Ignore promote's element argument. */ + if (fcode == RS6000_OVLD_VEC_PROMOTE + && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1]))) + goto bad; + + arg = (*arglist)[0]; + type = TREE_TYPE (arg); + if (!SCALAR_FLOAT_TYPE_P (type) + && !INTEGRAL_TYPE_P (type)) + goto bad; + unsigned_p = TYPE_UNSIGNED (type); + switch (TYPE_MODE (type)) + { + case E_TImode: + type = unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node; + size = 1; + break; + case E_DImode: + type = unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node; + size = 2; + break; + case E_SImode: + type = unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node; + size = 4; + break; + case E_HImode: + type = unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node; + size = 8; + break; + case E_QImode: + type = unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node; + size = 16; + break; + case E_SFmode: + type = V4SF_type_node; + size = 4; + break; + case E_DFmode: + type = V2DF_type_node; + size = 2; + break; + default: + goto bad; + } + arg = save_expr (fold_convert (TREE_TYPE (type), arg)); + vec_alloc (vec, size); + for (i = 0; i < size; i++) + { + constructor_elt elt = {NULL_TREE, arg}; + vec->quick_push (elt); + } + return build_constructor (type, vec); + } + + /* For now use pointer tricks to do the extraction, unless we are on VSX + extracting a double from a constant offset. */ + if (fcode == RS6000_OVLD_VEC_EXTRACT) + { + tree arg1; + tree arg1_type; + tree arg2; + tree arg1_inner_type; + tree decl, stmt; + tree innerptrtype; + machine_mode mode; + + /* No second argument. */ + if (nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", "vec_extract"); + return error_mark_node; + } + + arg2 = (*arglist)[1]; + arg1 = (*arglist)[0]; + arg1_type = TREE_TYPE (arg1); + + if (TREE_CODE (arg1_type) != VECTOR_TYPE) + goto bad; + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) + goto bad; + + /* See if we can optimize vec_extracts with the current VSX instruction + set. */ + mode = TYPE_MODE (arg1_type); + if (VECTOR_MEM_VSX_P (mode)) + + { + tree call = NULL_TREE; + int nunits = GET_MODE_NUNITS (mode); + + arg2 = fold_for_warn (arg2); + + /* If the second argument is an integer constant, generate + the built-in code if we can. We need 64-bit and direct + move to extract the small integer vectors. */ + if (TREE_CODE (arg2) == INTEGER_CST) + { + wide_int selector = wi::to_wide (arg2); + selector = wi::umod_trunc (selector, nunits); + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); + switch (mode) + { + default: + break; + + case E_V1TImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V1TI]; + break; + + case E_V2DFmode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF]; + break; + + case E_V2DImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI]; + break; + + case E_V4SFmode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF]; + break; + + case E_V4SImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI]; + break; + + case E_V8HImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI]; + break; + + case E_V16QImode: + if (TARGET_DIRECT_MOVE_64BIT) + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI]; + break; + } + } + + /* If the second argument is variable, we can optimize it if we are + generating 64-bit code on a machine with direct move. */ + else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT) + { + switch (mode) + { + default: + break; + + case E_V2DFmode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF]; + break; + + case E_V2DImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI]; + break; + + case E_V4SFmode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF]; + break; + + case E_V4SImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI]; + break; + + case E_V8HImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI]; + break; + + case E_V16QImode: + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI]; + break; + } + } + + if (call) + { + tree result = build_call_expr (call, 2, arg1, arg2); + /* Coerce the result to vector element type. May be no-op. */ + arg1_inner_type = TREE_TYPE (arg1_type); + result = fold_convert (arg1_inner_type, result); + return result; + } + } + + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */ + arg1_inner_type = TREE_TYPE (arg1_type); + tree subp = build_int_cst (TREE_TYPE (arg2), + TYPE_VECTOR_SUBPARTS (arg1_type) - 1); + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, subp, 0); + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); + DECL_EXTERNAL (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_CONTEXT (decl) = current_function_decl; + TREE_USED (decl) = 1; + TREE_TYPE (decl) = arg1_type; + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + TREE_ADDRESSABLE (decl) = 1; + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } + + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + + /* PR83660: We mark this as having side effects so that + downstream in fold_build_cleanup_point_expr () it will get a + CLEANUP_POINT_EXPR. If it does not we can run into an ICE + later in gimplify_cleanup_point_expr (). Potentially this + causes missed optimization because there actually is no side + effect. */ + if (c_dialect_cxx ()) + TREE_SIDE_EFFECTS (stmt) = 1; + + return stmt; + } + + /* For now use pointer tricks to do the insertion, unless we are on VSX + inserting a double to a constant offset. */ + if (fcode == RS6000_OVLD_VEC_INSERT) + { + tree arg0; + tree arg1; + tree arg2; + tree arg1_type; + tree decl, stmt; + machine_mode mode; + + /* No second or third arguments. */ + if (nargs != 3) + { + error ("builtin %qs only accepts 3 arguments", "vec_insert"); + return error_mark_node; + } + + arg0 = (*arglist)[0]; + arg1 = (*arglist)[1]; + arg1_type = TREE_TYPE (arg1); + arg2 = fold_for_warn ((*arglist)[2]); + + if (TREE_CODE (arg1_type) != VECTOR_TYPE) + goto bad; + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2))) + goto bad; + + /* If we can use the VSX xxpermdi instruction, use that for insert. */ + mode = TYPE_MODE (arg1_type); + if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST) + { + wide_int selector = wi::to_wide (arg2); + selector = wi::umod_trunc (selector, 2); + tree call = NULL_TREE; + + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); + if (mode == V2DFmode) + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DF]; + else if (mode == V2DImode) + call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DI]; + + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types + reversed. */ + if (call) + return build_call_expr (call, 3, arg1, arg0, arg2); + } + else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST) + { + tree call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V1TI]; + wide_int selector = wi::zero(32); + + arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector); + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types + reversed. */ + return build_call_expr (call, 3, arg1, arg0, arg2); + } + + /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0 with + VIEW_CONVERT_EXPR. i.e.: + D.3192 = v1; + _1 = n & 3; + VIEW_CONVERT_EXPR<int[4]>(D.3192)[_1] = i; + v1 = D.3192; + D.3194 = v1; */ + if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1) + arg2 = build_int_cst (TREE_TYPE (arg2), 0); + else + arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2, + build_int_cst (TREE_TYPE (arg2), + TYPE_VECTOR_SUBPARTS (arg1_type) + - 1), 0); + decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type); + DECL_EXTERNAL (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_CONTEXT (decl) = current_function_decl; + TREE_USED (decl) = 1; + TREE_TYPE (decl) = arg1_type; + TREE_READONLY (decl) = TYPE_READONLY (arg1_type); + TREE_ADDRESSABLE (decl) = 1; + if (c_dialect_cxx ()) + { + stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1, + NULL_TREE, NULL_TREE); + SET_EXPR_LOCATION (stmt, loc); + } + else + { + DECL_INITIAL (decl) = arg1; + stmt = build1 (DECL_EXPR, arg1_type, decl); + SET_EXPR_LOCATION (stmt, loc); + stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt); + } + + if (TARGET_VSX) + { + stmt = build_array_ref (loc, stmt, arg2); + stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } + else + { + tree arg1_inner_type; + tree innerptrtype; + arg1_inner_type = TREE_TYPE (arg1_type); + innerptrtype = build_pointer_type (arg1_inner_type); + + stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0); + stmt = convert (innerptrtype, stmt); + stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1); + stmt = build_indirect_ref (loc, stmt, RO_NULL); + stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt, + convert (TREE_TYPE (stmt), arg0)); + stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl); + } + return stmt; + } + + unsigned int n; + for (n = 0; + !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; + fnargs = TREE_CHAIN (fnargs), n++) + { + tree decl_type = TREE_VALUE (fnargs); + tree arg = (*arglist)[n]; + tree type; + + if (arg == error_mark_node) + return error_mark_node; + + if (n >= MAX_OVLD_ARGS) + abort (); + + arg = default_conversion (arg); + + /* The C++ front-end converts float * to const void * using + NOP_EXPR<const void *> (NOP_EXPR<void *> (x)). */ + type = TREE_TYPE (arg); + if (POINTER_TYPE_P (type) + && TREE_CODE (arg) == NOP_EXPR + && lang_hooks.types_compatible_p (TREE_TYPE (arg), + const_ptr_type_node) + && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)), + ptr_type_node)) + { + arg = TREE_OPERAND (arg, 0); + type = TREE_TYPE (arg); + } + + /* Remove the const from the pointers to simplify the overload + matching further down. */ + if (POINTER_TYPE_P (decl_type) + && POINTER_TYPE_P (type) + && TYPE_QUALS (TREE_TYPE (type)) != 0) + { + if (TYPE_READONLY (TREE_TYPE (type)) + && !TYPE_READONLY (TREE_TYPE (decl_type))) + warning (0, "passing argument %d of %qE discards const qualifier " + "from pointer target type", n + 1, fndecl); + type = build_qualified_type (TREE_TYPE (type), 0); + type = build_pointer_type (type); + arg = fold_convert (type, arg); + } + + /* For RS6000_OVLD_VEC_LXVL, convert any const * to its non constant + equivalent to simplify the overload matching below. */ + if (fcode == RS6000_OVLD_VEC_LXVL) + { + if (POINTER_TYPE_P (type) + && TYPE_READONLY (TREE_TYPE (type))) + { + type = build_qualified_type (TREE_TYPE (type), 0); + type = build_pointer_type (type); + arg = fold_convert (type, arg); + } + } + + args[n] = arg; + types[n] = type; + } + + /* If the number of arguments did not match the prototype, return NULL + and the generic code will issue the appropriate error message. */ + if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs) + return NULL; + + if (fcode == RS6000_OVLD_VEC_STEP) + { + if (TREE_CODE (types[0]) != VECTOR_TYPE) + goto bad; + + return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0])); + } + + { + bool unsupported_builtin = false; + enum rs6000_gen_builtins overloaded_code; + bool supported = false; + ovlddata *instance = rs6000_overload_info[adj_fcode].first_instance; + gcc_assert (instance != NULL); + + /* Need to special case __builtin_cmpb because the overloaded forms + of this function take (unsigned int, unsigned int) or (unsigned + long long int, unsigned long long int). Since C conventions + allow the respective argument types to be implicitly coerced into + each other, the default handling does not provide adequate + discrimination between the desired forms of the function. */ + if (fcode == RS6000_OVLD_SCAL_CMPB) + { + machine_mode arg1_mode = TYPE_MODE (types[0]); + machine_mode arg2_mode = TYPE_MODE (types[1]); + + if (nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb"); + return error_mark_node; + } + + /* If any supplied arguments are wider than 32 bits, resolve to + 64-bit variant of built-in function. */ + if (GET_MODE_PRECISION (arg1_mode) > 32 + || GET_MODE_PRECISION (arg2_mode) > 32) + /* Assure all argument and result types are compatible with + the built-in function represented by RS6000_BIF_CMPB. */ + overloaded_code = RS6000_BIF_CMPB; + else + /* Assure all argument and result types are compatible with + the built-in function represented by RS6000_BIF_CMPB_32. */ + overloaded_code = RS6000_BIF_CMPB_32; + + while (instance && instance->bifid != overloaded_code) + instance = instance->next; + + gcc_assert (instance != NULL); + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; + tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype)); + tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype))); + + if (rs6000_new_builtin_type_compatible (types[0], parmtype0) + && rs6000_new_builtin_type_compatible (types[1], parmtype1)) + { + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node + && rs6000_new_builtin_is_supported (instance->bifid)) + { + tree ret_type = TREE_TYPE (instance->fntype); + return altivec_build_new_resolved_builtin (args, n, fntype, + ret_type, + instance->bifid, + fcode); + } + else + unsupported_builtin = true; + } + } + else if (fcode == RS6000_OVLD_VEC_VSIE) + { + machine_mode arg1_mode = TYPE_MODE (types[0]); + + if (nargs != 2) + { + error ("builtin %qs only accepts 2 arguments", + "scalar_insert_exp"); + return error_mark_node; + } + + /* If supplied first argument is wider than 64 bits, resolve to + 128-bit variant of built-in function. */ + if (GET_MODE_PRECISION (arg1_mode) > 64) + { + /* If first argument is of float variety, choose variant + that expects __ieee128 argument. Otherwise, expect + __int128 argument. */ + if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) + overloaded_code = RS6000_BIF_VSIEQPF; + else + overloaded_code = RS6000_BIF_VSIEQP; + } + else + { + /* If first argument is of float variety, choose variant + that expects double argument. Otherwise, expect + long long int argument. */ + if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) + overloaded_code = RS6000_BIF_VSIEDPF; + else + overloaded_code = RS6000_BIF_VSIEDP; + } + + while (instance && instance->bifid != overloaded_code) + instance = instance->next; + + gcc_assert (instance != NULL); + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; + tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype)); + tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype))); + + if (rs6000_new_builtin_type_compatible (types[0], parmtype0) + && rs6000_new_builtin_type_compatible (types[1], parmtype1)) + { + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node + && rs6000_new_builtin_is_supported (instance->bifid)) + { + tree ret_type = TREE_TYPE (instance->fntype); + return altivec_build_new_resolved_builtin (args, n, fntype, + ret_type, + instance->bifid, + fcode); + } + else + unsupported_builtin = true; + } + } + else + { + /* Functions with no arguments can have only one overloaded + instance. */ + gcc_assert (n > 0 || !instance->next); + + for (; instance != NULL; instance = instance->next) + { + bool mismatch = false; + tree nextparm = TYPE_ARG_TYPES (instance->fntype); + + for (unsigned int arg_i = 0; + arg_i < nargs && nextparm != NULL; + arg_i++) + { + tree parmtype = TREE_VALUE (nextparm); + if (!rs6000_new_builtin_type_compatible (types[arg_i], + parmtype)) + { + mismatch = true; + break; + } + nextparm = TREE_CHAIN (nextparm); + } + + if (mismatch) + continue; + + supported = rs6000_new_builtin_is_supported (instance->bifid); + if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node + && supported) + { + tree fntype = rs6000_builtin_info_x[instance->bifid].fntype; + tree ret_type = TREE_TYPE (instance->fntype); + return altivec_build_new_resolved_builtin (args, n, fntype, + ret_type, + instance->bifid, + fcode); + } + else + { + unsupported_builtin = true; + break; + } + } + } + + if (unsupported_builtin) + { + const char *name = rs6000_overload_info[adj_fcode].ovld_name; + if (!supported) + { + const char *internal_name + = rs6000_builtin_info_x[instance->bifid].bifname; + /* An error message making reference to the name of the + non-overloaded function has already been issued. Add + clarification of the previous message. */ + rich_location richloc (line_table, input_location); + inform (&richloc, "builtin %qs requires builtin %qs", + name, internal_name); + } + else + error ("%qs is not supported in this compiler configuration", name); + + return error_mark_node; + } + } + bad: + { + const char *name = rs6000_overload_info[adj_fcode].ovld_name; + error ("invalid parameter combination for AltiVec intrinsic %qs", name); + return error_mark_node; + } +} diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index e8625d1..a55cb7c 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -12971,6 +12971,59 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) return false; } +/* Check whether a builtin function is supported in this target + configuration. */ +bool +rs6000_new_builtin_is_supported (enum rs6000_gen_builtins fncode) +{ + switch (rs6000_builtin_info_x[(size_t) fncode].enable) + { + case ENB_ALWAYS: + return true; + case ENB_P5: + return TARGET_POPCNTB; + case ENB_P6: + return TARGET_CMPB; + case ENB_P7: + return TARGET_POPCNTD; + case ENB_P7_64: + return TARGET_POPCNTD && TARGET_POWERPC64; + case ENB_P8: + return TARGET_DIRECT_MOVE; + case ENB_P8V: + return TARGET_P8_VECTOR; + case ENB_P9: + return TARGET_MODULO; + case ENB_P9_64: + return TARGET_MODULO && TARGET_POWERPC64; + case ENB_P9V: + return TARGET_P9_VECTOR; + case ENB_P10: + return TARGET_POWER10; + case ENB_P10_64: + return TARGET_POWER10 && TARGET_POWERPC64; + case ENB_ALTIVEC: + return TARGET_ALTIVEC; + case ENB_VSX: + return TARGET_VSX; + case ENB_CELL: + return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL; + case ENB_IEEE128_HW: + return TARGET_FLOAT128_HW; + case ENB_DFP: + return TARGET_DFP; + case ENB_CRYPTO: + return TARGET_CRYPTO; + case ENB_HTM: + return TARGET_HTM; + case ENB_MMA: + return TARGET_MMA; + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index f3d6156..f65932e 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -2314,7 +2314,7 @@ write_decls (void) fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n"); fprintf (header_file, - "extern bool rs6000_new_builtin_is_supported_p " + "extern bool rs6000_new_builtin_is_supported " "(rs6000_gen_builtins);\n"); fprintf (header_file, "extern tree rs6000_builtin_decl (unsigned, " diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2570937..ad81dfb 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -21728,7 +21728,8 @@ rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) if (decl && DECL_P (decl) && VAR_OR_FUNCTION_DECL_P (decl) - && symtab_node::get (decl)->alias == 0 + && (symtab_node::get (decl) == NULL + || symtab_node::get (decl)->alias == 0) && symname[strlen (symname) - 1] != ']') { const char *smclass = NULL; @@ -22174,7 +22175,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, break; case UNSPEC: - if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ) + if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ) { *total = 0; return true; diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 3753de1..c1cb9ab 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -561,10 +561,6 @@ mpower9-minmax Target Undocumented Mask(P9_MINMAX) Var(rs6000_isa_flags) Use the new min/max instructions defined in ISA 3.0. -mtoc-fusion -Target Undocumented Mask(TOC_FUSION) Var(rs6000_isa_flags) -Fuse medium/large code model toc references with the memory instruction. - mmodulo Target Undocumented Mask(MODULO) Var(rs6000_isa_flags) Generate the integer modulo instructions. diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 92766d8..d48a4b1 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -44,15 +44,11 @@ rs6000-logue.o: $(srcdir)/config/rs6000/rs6000-logue.c $(COMPILE) $< $(POSTCOMPILE) -rs6000-gen-builtins.o: $(srcdir)/config/rs6000/rs6000-gen-builtins.c - $(COMPILE) $< - $(POSTCOMPILE) - -rbtree.o: $(srcdir)/config/rs6000/rbtree.c - $(COMPILE) $< - $(POSTCOMPILE) +build/rs6000-gen-builtins.o: $(srcdir)/config/rs6000/rs6000-gen-builtins.c +build/rbtree.o: $(srcdir)/config/rs6000/rbtree.c -rs6000-gen-builtins: rs6000-gen-builtins.o rbtree.o +build/rs6000-gen-builtins$(build_exeext): build/rs6000-gen-builtins.o \ + build/rbtree.o $(BUILD_LIBDEPS) $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ \ $(filter-out $(BUILD_LIBDEPS), $^) $(BUILD_LIBS) @@ -62,10 +58,11 @@ rs6000-gen-builtins: rs6000-gen-builtins.o rbtree.o # <recipe> # For now, the header files depend on rs6000-builtins.c, which avoids # races because the .c file is closed last in rs6000-gen-builtins.c. -rs6000-builtins.c: rs6000-gen-builtins \ +rs6000-builtins.c: build/rs6000-gen-builtins$(build_exeext) \ $(srcdir)/config/rs6000/rs6000-builtin-new.def \ $(srcdir)/config/rs6000/rs6000-overload.def - ./rs6000-gen-builtins $(srcdir)/config/rs6000/rs6000-builtin-new.def \ + $(RUN_GEN) ./build/rs6000-gen-builtins$(build_exeext) \ + $(srcdir)/config/rs6000/rs6000-builtin-new.def \ $(srcdir)/config/rs6000/rs6000-overload.def rs6000-builtins.h \ rs6000-builtins.c rs6000-vecdefines.h diff --git a/gcc/config/sparc/leon5.md b/gcc/config/sparc/leon5.md new file mode 100644 index 0000000..6a065b1 --- /dev/null +++ b/gcc/config/sparc/leon5.md @@ -0,0 +1,103 @@ +;; Scheduling description for LEON5. +;; Copyright (C) 2021 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; The LEON5 can often dual issue instructions from the same 64-bit aligned +;; double word if there are no data dependencies. +;; +;; Avoid scheduling load/store, FPU, and multiply instructions back to +;; back, regardless of data dependencies. +;; +;; Push comparisons away from the associated branch instruction. +;; +;; Avoid scheduling ALU instructions with data dependencies back to back. +;; +;; Schedule three instructions between load and dependent instruction. + +(define_automaton "leon5") + +(define_cpu_unit "leon5_memory" "leon5") +(define_cpu_unit "leon5_mul" "leon5") +(define_cpu_unit "grfpu_d" "grfpu") +(define_cpu_unit "grfpu_s" "grfpu") + +(define_insn_reservation "leon5_load" 4 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "load,sload")) + "leon5_memory * 2, nothing * 2") + +(define_insn_reservation "leon5_fpload" 2 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpload")) + "leon5_memory * 2 + grfpu_alu * 2") + +(define_insn_reservation "leon5_store" 2 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "store")) + "leon5_memory * 2") + +(define_insn_reservation "leon5_fpstore" 2 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpstore")) + "leon5_memory * 2 + grfpu_alu * 2") + +(define_insn_reservation "leon5_ialu" 2 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "ialu, shift, ialuX")) + "nothing * 2") + +(define_insn_reservation "leon5_compare" 5 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "compare")) + "nothing * 5") + +(define_insn_reservation "leon5_imul" 4 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "imul")) + "leon5_mul * 2, nothing * 2") + +(define_insn_reservation "leon5_idiv" 35 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "imul")) + "nothing * 35") + +(define_insn_reservation "leon5_fp_alu" 5 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fp,fpcmp,fpmul,fpmove")) + "grfpu_alu * 2, nothing*3") + +(define_insn_reservation "leon5_fp_divs" 17 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpdivs")) + "grfpu_alu * 2 + grfpu_d*16, nothing") + +(define_insn_reservation "leon5_fp_divd" 18 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpdivd")) + "grfpu_alu * 2 + grfpu_d*17, nothing") + +(define_insn_reservation "leon5_fp_sqrts" 25 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpsqrts")) + "grfpu_alu * 2 + grfpu_s*24, nothing") + +(define_insn_reservation "leon5_fp_sqrtd" 26 + (and (eq_attr "cpu" "leon5") + (eq_attr "type" "fpsqrtd")) + "grfpu_alu * 2 + grfpu_s*25, nothing") diff --git a/gcc/config/sparc/sparc-opts.h b/gcc/config/sparc/sparc-opts.h index 1af556e..9299cf6 100644 --- a/gcc/config/sparc/sparc-opts.h +++ b/gcc/config/sparc/sparc-opts.h @@ -31,6 +31,7 @@ enum sparc_processor_type { PROCESSOR_HYPERSPARC, PROCESSOR_LEON, PROCESSOR_LEON3, + PROCESSOR_LEON5, PROCESSOR_LEON3V7, PROCESSOR_SPARCLITE, PROCESSOR_F930, diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 06f41d7..6bc6f0a 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -270,6 +270,31 @@ struct processor_costs leon3_costs = { }; static const +struct processor_costs leon5_costs = { + COSTS_N_INSNS (1), /* int load */ + COSTS_N_INSNS (1), /* int signed load */ + COSTS_N_INSNS (1), /* int zeroed load */ + COSTS_N_INSNS (1), /* float load */ + COSTS_N_INSNS (1), /* fmov, fneg, fabs */ + COSTS_N_INSNS (1), /* fadd, fsub */ + COSTS_N_INSNS (1), /* fcmp */ + COSTS_N_INSNS (1), /* fmov, fmovr */ + COSTS_N_INSNS (1), /* fmul */ + COSTS_N_INSNS (17), /* fdivs */ + COSTS_N_INSNS (18), /* fdivd */ + COSTS_N_INSNS (25), /* fsqrts */ + COSTS_N_INSNS (26), /* fsqrtd */ + COSTS_N_INSNS (4), /* imul */ + COSTS_N_INSNS (4), /* imulX */ + 0, /* imul bit factor */ + COSTS_N_INSNS (35), /* idiv */ + COSTS_N_INSNS (35), /* idivX */ + COSTS_N_INSNS (1), /* movcc/movr */ + 0, /* shift penalty */ + 3 /* branch cost */ +}; + +static const struct processor_costs sparclet_costs = { COSTS_N_INSNS (3), /* int load */ COSTS_N_INSNS (3), /* int signed load */ @@ -575,6 +600,7 @@ static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); +static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int); static void sparc_emit_set_const32 (rtx, rtx); static void sparc_emit_set_const64 (rtx, rtx); @@ -1045,6 +1071,43 @@ atomic_insn_for_leon3_p (rtx_insn *insn) } } +/* True if INSN is a store instruction. */ + +static bool +store_insn_p (rtx_insn *insn) +{ + if (GET_CODE (PATTERN (insn)) != SET) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_STORE: + case TYPE_FPSTORE: + return true; + default: + return false; + } +} + +/* True if INSN is a load instruction. */ + +static bool +load_insn_p (rtx_insn *insn) +{ + if (GET_CODE (PATTERN (insn)) != SET) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + case TYPE_SLOAD: + case TYPE_FPLOAD: + return true; + default: + return false; + } +} + /* We use a machine specific pass to enable workarounds for errata. We need to have the (essentially) final form of the insn stream in order @@ -1057,10 +1120,29 @@ atomic_insn_for_leon3_p (rtx_insn *insn) && GET_CODE (PATTERN (INSN)) != USE \ && GET_CODE (PATTERN (INSN)) != CLOBBER) +rtx_insn * +next_active_non_empty_insn (rtx_insn *insn) +{ + insn = next_active_insn (insn); + + while (insn + && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + || GET_CODE (PATTERN (insn)) == ASM_INPUT + || (USEFUL_INSN_P (insn) + && (asm_noperands (PATTERN (insn)) >= 0) + && !strcmp (decode_asm_operands (PATTERN (insn), + NULL, NULL, NULL, + NULL, NULL), "")))) + insn = next_active_insn (insn); + + return insn; +} + static unsigned int sparc_do_work_around_errata (void) { rtx_insn *insn, *next; + bool find_first_useful = true; /* Force all instructions to be split into their final form. */ split_all_insns_noflow (); @@ -1085,6 +1167,16 @@ sparc_do_work_around_errata (void) else jump = NULL; + /* Do not begin function with atomic instruction. */ + if (sparc_fix_ut700 + && find_first_useful + && USEFUL_INSN_P (insn)) + { + find_first_useful = false; + if (atomic_insn_for_leon3_p (insn)) + emit_insn_before (gen_nop (), insn); + } + /* Place a NOP at the branch target of an integer branch if it is a floating-point operation or a floating-point branch. */ if (sparc_fix_gr712rc @@ -1105,9 +1197,7 @@ sparc_do_work_around_errata (void) instruction at branch target. */ if (sparc_fix_ut700 && NONJUMP_INSN_P (insn) - && (set = single_set (insn)) != NULL_RTX - && mem_ref (SET_SRC (set)) - && REG_P (SET_DEST (set))) + && load_insn_p (insn)) { if (jump && jump_to_label_p (jump)) { @@ -1116,7 +1206,7 @@ sparc_do_work_around_errata (void) emit_insn_before (gen_nop (), target); } - next = next_active_insn (insn); + next = next_active_non_empty_insn (insn); if (!next) break; @@ -1212,30 +1302,19 @@ sparc_do_work_around_errata (void) if (sparc_fix_b2bst && NONJUMP_INSN_P (insn) && (set = single_set (insn)) != NULL_RTX - && MEM_P (SET_DEST (set))) + && store_insn_p (insn)) { /* Sequence B begins with a double-word store. */ bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8; rtx_insn *after; int i; - next = next_active_insn (insn); + next = next_active_non_empty_insn (insn); if (!next) break; for (after = next, i = 0; i < 2; i++) { - /* Skip empty assembly statements. */ - if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE) - || (USEFUL_INSN_P (after) - && (asm_noperands (PATTERN (after))>=0) - && !strcmp (decode_asm_operands (PATTERN (after), - NULL, NULL, NULL, - NULL, NULL), ""))) - after = next_active_insn (after); - if (!after) - break; - /* If the insn is a branch, then it cannot be problematic. */ if (!NONJUMP_INSN_P (after) || GET_CODE (PATTERN (after)) == SEQUENCE) @@ -1245,8 +1324,7 @@ sparc_do_work_around_errata (void) if (seq_b) { /* Add NOP if followed by a store. */ - if ((set = single_set (after)) != NULL_RTX - && MEM_P (SET_DEST (set))) + if (store_insn_p (after)) insert_nop = true; /* Otherwise it is ok. */ @@ -1261,15 +1339,14 @@ sparc_do_work_around_errata (void) && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) break; - after = next_active_insn (after); + after = next_active_non_empty_insn (after); if (!after) break; } /* Add NOP if third instruction is a store. */ if (i == 1 - && (set = single_set (after)) != NULL_RTX - && MEM_P (SET_DEST (set))) + && store_insn_p (after)) insert_nop = true; } } @@ -1596,6 +1673,10 @@ dump_target_flag_bits (const int flags) fprintf (stderr, "CBCOND "); if (flags & MASK_DEPRECATED_V8_INSNS) fprintf (stderr, "DEPRECATED_V8_INSNS "); + if (flags & MASK_LEON) + fprintf (stderr, "LEON "); + if (flags & MASK_LEON3) + fprintf (stderr, "LEON3 "); if (flags & MASK_SPARCLET) fprintf (stderr, "SPARCLET "); if (flags & MASK_SPARCLITE) @@ -1632,6 +1713,7 @@ sparc_option_override (void) { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, { TARGET_CPU_leon, PROCESSOR_LEON }, { TARGET_CPU_leon3, PROCESSOR_LEON3 }, + { TARGET_CPU_leon5, PROCESSOR_LEON5 }, { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, { TARGET_CPU_sparclite, PROCESSOR_F930 }, { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, @@ -1663,6 +1745,7 @@ sparc_option_override (void) { "hypersparc", MASK_ISA, MASK_V8 }, { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON }, { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 }, + { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 }, { "leon3v7", MASK_ISA, MASK_LEON3 }, { "sparclite", MASK_ISA, MASK_SPARCLITE }, /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ @@ -1973,6 +2056,9 @@ sparc_option_override (void) case PROCESSOR_LEON3V7: sparc_costs = &leon3_costs; break; + case PROCESSOR_LEON5: + sparc_costs = &leon5_costs; + break; case PROCESSOR_SPARCLET: case PROCESSOR_TSC701: sparc_costs = &sparclet_costs; @@ -10120,11 +10206,64 @@ hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, } static int +leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, + int cost) +{ + enum attr_type insn_type, dep_type; + rtx pat = PATTERN (insn); + rtx dep_pat = PATTERN (dep_insn); + + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + insn_type = get_attr_type (insn); + dep_type = get_attr_type (dep_insn); + + switch (dtype) + { + case REG_DEP_TRUE: + /* Data dependency; DEP_INSN writes a register that INSN reads some + cycles later. */ + + switch (insn_type) + { + case TYPE_STORE: + /* Try to schedule three instructions between the store and + the ALU instruction that generated the data. */ + if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT) + { + if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) + break; + + if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) + return 4; + } + break; + default: + break; + } + break; + case REG_DEP_ANTI: + /* Penalize anti-dependencies for FPU instructions. */ + if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD) + return 4; + break; + default: + break; + } + + return cost; +} + +static int sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost, unsigned int) { switch (sparc_cpu) { + case PROCESSOR_LEON5: + cost = leon5_adjust_cost (insn, dep_type, dep, cost); + break; case PROCESSOR_SUPERSPARC: cost = supersparc_adjust_cost (insn, dep_type, dep, cost); break; diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 4da5a06..edafa99 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -120,21 +120,22 @@ along with GCC; see the file COPYING3. If not see #define TARGET_CPU_leon 4 #define TARGET_CPU_leon3 5 #define TARGET_CPU_leon3v7 6 -#define TARGET_CPU_sparclite 7 -#define TARGET_CPU_f930 7 /* alias */ -#define TARGET_CPU_f934 7 /* alias */ -#define TARGET_CPU_sparclite86x 8 -#define TARGET_CPU_sparclet 9 -#define TARGET_CPU_tsc701 9 /* alias */ -#define TARGET_CPU_v9 10 /* generic v9 implementation */ -#define TARGET_CPU_sparcv9 10 /* alias */ -#define TARGET_CPU_sparc64 10 /* alias */ -#define TARGET_CPU_ultrasparc 11 -#define TARGET_CPU_ultrasparc3 12 -#define TARGET_CPU_niagara 13 -#define TARGET_CPU_niagara2 14 -#define TARGET_CPU_niagara3 15 -#define TARGET_CPU_niagara4 16 +#define TARGET_CPU_leon5 7 +#define TARGET_CPU_sparclite 8 +#define TARGET_CPU_f930 8 /* alias */ +#define TARGET_CPU_f934 8 /* alias */ +#define TARGET_CPU_sparclite86x 9 +#define TARGET_CPU_sparclet 10 +#define TARGET_CPU_tsc701 10 /* alias */ +#define TARGET_CPU_v9 11 /* generic v9 implementation */ +#define TARGET_CPU_sparcv9 11 /* alias */ +#define TARGET_CPU_sparc64 11 /* alias */ +#define TARGET_CPU_ultrasparc 12 +#define TARGET_CPU_ultrasparc3 13 +#define TARGET_CPU_niagara 14 +#define TARGET_CPU_niagara2 15 +#define TARGET_CPU_niagara3 16 +#define TARGET_CPU_niagara4 17 #define TARGET_CPU_niagara7 19 #define TARGET_CPU_m8 20 @@ -229,7 +230,8 @@ along with GCC; see the file COPYING3. If not see #endif #if TARGET_CPU_DEFAULT == TARGET_CPU_leon \ - || TARGET_CPU_DEFAULT == TARGET_CPU_leon3 + || TARGET_CPU_DEFAULT == TARGET_CPU_leon3 \ + || TARGET_CPU_DEFAULT == TARGET_CPU_leon5 #define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__" #define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG #endif @@ -285,6 +287,7 @@ along with GCC; see the file COPYING3. If not see %{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \ %{mcpu=leon:-D__leon__ -D__sparc_v8__} \ %{mcpu=leon3:-D__leon__ -D__sparc_v8__} \ +%{mcpu=leon5:-D__leon__ -D__sparc_v8__} \ %{mcpu=leon3v7:-D__leon__} \ %{mcpu=v9:-D__sparc_v9__} \ %{mcpu=ultrasparc:-D__sparc_v9__} \ @@ -337,6 +340,7 @@ along with GCC; see the file COPYING3. If not see %{mcpu=hypersparc:-Av8} \ %{mcpu=leon:" AS_LEON_FLAG "} \ %{mcpu=leon3:" AS_LEON_FLAG "} \ +%{mcpu=leon5:" AS_LEON_FLAG "} \ %{mcpu=leon3v7:" AS_LEONV7_FLAG "} \ %{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 24b76e0..294c918 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -233,6 +233,7 @@ hypersparc, leon, leon3, + leon5, leon3v7, sparclite, f930, @@ -638,6 +639,7 @@ (include "supersparc.md") (include "hypersparc.md") (include "leon.md") +(include "leon5.md") (include "sparclet.md") (include "ultra1_2.md") (include "ultra3.md") @@ -8353,9 +8355,15 @@ visl") (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_scratch:SI 2 "=&r") (const_int 0))] "TARGET_ARCH32" - "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2" +{ + if (sparc_fix_b2bst) + return "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2\;nop"; + else + return "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2"; +} [(set_attr "type" "multi") - (set_attr "length" "3")]) + (set (attr "length") (if_then_else (eq_attr "fix_b2bst" "true") + (const_int 4) (const_int 3)))]) (define_insn "stack_protect_set64" [(set (match_operand:DI 0 "memory_operand" "=m") diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt index fb79267..658a187 100644 --- a/gcc/config/sparc/sparc.opt +++ b/gcc/config/sparc/sparc.opt @@ -176,6 +176,9 @@ EnumValue Enum(sparc_processor) String(leon3v7) Value(PROCESSOR_LEON3V7) EnumValue +Enum(sparc_processor) String(leon5) Value(PROCESSOR_LEON5) + +EnumValue Enum(sparc_processor) String(sparclite) Value(PROCESSOR_SPARCLITE) EnumValue diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa index 973815c..d06e492 100644 --- a/gcc/config/xtensa/t-xtensa +++ b/gcc/config/xtensa/t-xtensa @@ -16,4 +16,5 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. +TM_H += $(srcdir)/../include/xtensa-config.h $(out_object_file): gt-xtensa.h diff --git a/gcc/configure b/gcc/configure index 2729327..36dba4e 100755 --- a/gcc/configure +++ b/gcc/configure @@ -753,10 +753,6 @@ FGREP SED LIBTOOL collect2 -NO_PIE_FLAG_FOR_BUILD -NO_PIE_CFLAGS_FOR_BUILD -BUILD_NO_PIE_FLAG -BUILD_NO_PIE_CFLAGS STMP_FIXINC BUILD_LDFLAGS BUILD_CXXFLAGS @@ -13324,24 +13320,14 @@ BUILD_CXXFLAGS='$(ALL_CXXFLAGS)' BUILD_LDFLAGS='$(LDFLAGS)' STMP_FIXINC=stmp-fixinc -BUILD_NO_PIE_CFLAGS='$(NO_PIE_CFLAGS)' -BUILD_NO_PIE_FLAG='$(NO_PIE_FLAG)' - # And these apply if build != host, or we are generating coverage data if test x$build != x$host || test "x$coverage_flags" != x then BUILD_CFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS-$@) $(CFLAGS_FOR_BUILD)' BUILD_CXXFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS-$@) $(CXXFLAGS_FOR_BUILD)' BUILD_LDFLAGS='$(LDFLAGS_FOR_BUILD)' - - NO_PIE_CFLAGS_FOR_BUILD=${NO_PIE_CFLAGS_FOR_BUILD-${NO_PIE_CFLAGS}} - NO_PIE_FLAG_FOR_BUILD=${NO_PIE_FLAG_FOR_BUILD-${NO_PIE_FLAG}} - BUILD_NO_PIE_CFLAGS='$(NO_PIE_CFLAGS_FOR_BUILD)' - BUILD_NO_PIE_FLAG='$(NO_PIE_FLAG_FOR_BUILD)' fi - - # Expand extra_headers to include complete path. # This substitutes for lots of t-* files. extra_headers_list= @@ -19468,7 +19454,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19471 "configure" +#line 19457 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19574,7 +19560,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19577 "configure" +#line 19563 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/gcc/configure.ac b/gcc/configure.ac index 259c933..fadd34d 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -2466,23 +2466,13 @@ BUILD_CXXFLAGS='$(ALL_CXXFLAGS)' AC_SUBST(BUILD_CXXFLAGS) BUILD_LDFLAGS='$(LDFLAGS)' AC_SUBST(BUILD_LDFLAGS) STMP_FIXINC=stmp-fixinc AC_SUBST(STMP_FIXINC) -BUILD_NO_PIE_CFLAGS='$(NO_PIE_CFLAGS)' AC_SUBST(BUILD_NO_PIE_CFLAGS) -BUILD_NO_PIE_FLAG='$(NO_PIE_FLAG)' AC_SUBST(BUILD_NO_PIE_FLAG) - # And these apply if build != host, or we are generating coverage data if test x$build != x$host || test "x$coverage_flags" != x then BUILD_CFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS-$@) $(CFLAGS_FOR_BUILD)' BUILD_CXXFLAGS='$(INTERNAL_CFLAGS) $(T_CFLAGS) $(CFLAGS-$@) $(CXXFLAGS_FOR_BUILD)' BUILD_LDFLAGS='$(LDFLAGS_FOR_BUILD)' - - NO_PIE_CFLAGS_FOR_BUILD=${NO_PIE_CFLAGS_FOR_BUILD-${NO_PIE_CFLAGS}} - NO_PIE_FLAG_FOR_BUILD=${NO_PIE_FLAG_FOR_BUILD-${NO_PIE_FLAG}} - BUILD_NO_PIE_CFLAGS='$(NO_PIE_CFLAGS_FOR_BUILD)' - BUILD_NO_PIE_FLAG='$(NO_PIE_FLAG_FOR_BUILD)' fi -AC_SUBST(NO_PIE_CFLAGS_FOR_BUILD) -AC_SUBST(NO_PIE_FLAG_FOR_BUILD) # Expand extra_headers to include complete path. # This substitutes for lots of t-* files. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 8d1ec5d..4061a85 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,159 @@ +2021-09-16 Patrick Palka <ppalka@redhat.com> + + PR c++/98486 + * constraint.cc (get_normalized_constraints_from_decl): Always + look up constraints using the most general template. + * decl.c (grokdeclarator): Set constraints on a static data + member template. + * pt.c (determine_specialization): Check constraints on a + variable template. + +2021-09-16 Iain Sandoe <iain@sandoe.co.uk> + + * coroutines.cc (await_statement_walker): Code cleanups. + +2021-09-16 Jason Merrill <jason@redhat.com> + + * constexpr.c (cxx_eval_outermost_constant_expr): Use + protected_set_expr_location. + +2021-09-15 Patrick Palka <ppalka@redhat.com> + + PR c++/101904 + * call.c (build_user_type_conversion_1): Add tf_conv to complain. + (add_candidates): When in a SFINAE context, instead of adding a + candidate to bad_fns just mark it unviable. + +2021-09-15 Jason Merrill <jason@redhat.com> + + * cp-tree.h (parsing_function_declarator): Declare. + * name-lookup.c (set_decl_context_in_fn): Use it. + * parser.c (cp_parser_direct_declarator): Use it. + (parsing_function_declarator): New. + +2021-09-15 Jakub Jelinek <jakub@redhat.com> + + PR c++/88578 + PR c++/102295 + * typeck2.c (split_nonconstant_init_1): Don't throw away empty + initializers of flexible array members if they have non-zero type + size. + +2021-09-15 Patrick Palka <ppalka@redhat.com> + + PR c++/102050 + * decl.c (grok_special_member_properties): Set + TYPE_HAS_COPY_CTOR, TYPE_HAS_DEFAULT_CONSTRUCTOR + and TYPE_HAS_LIST_CTOR independently from each other. + +2021-09-15 Jason Merrill <jason@redhat.com> + + * decl.c (cxx_init_decl_processing): Only warn about odd + interference sizes if they were specified with --param. + +2021-09-15 Jason Merrill <jason@redhat.com> + + PR c++/48396 + * cp-tree.h (enum cp_tree_index): Remove CPTI_TYPE_INFO_PTR_TYPE. + (type_info_ptr_type): Remove. + * rtti.c (init_rtti_processing): Don't predeclare std::type_info. + (typeid_ok_p): Check for null const_type_info_type_node. + (type_info_ptr_type, get_void_tinfo_ptr): New fns. + (get_tinfo_decl_dynamic, get_tinfo_ptr): Use them. + (ptr_initializer, ptm_initializer, get_pseudo_ti_init): Use them. + (get_tinfo_desc): Use const_ptr_type_node. + +2021-09-15 Jason Merrill <jason@redhat.com> + + * parser.c (cp_parser_template_name): Move object type. + (cp_parser_pre_parsed_nested_name_specifier): Likewise. + +2021-09-15 Jason Merrill <jason@redhat.com> + + * parser.c (cp_parser_unqualified_id): Only complain about ~A<T> in + a declarator. + +2021-09-14 Iain Sandoe <iain@sandoe.co.uk> + + * coroutines.cc (struct param_info): Add copy_var. + (build_actor_fn): Use simplified param references. + (register_param_uses): Likewise. + (rewrite_param_uses): Likewise. + (analyze_fn_parms): New function. + (coro_rewrite_function_body): Add proxies for the fn + parameters to the outer bind scope of the rewritten code. + (morph_fn_to_coro): Use simplified version of param ref. + +2021-09-14 Iain Sandoe <iain@sandoe.co.uk> + + * coroutines.cc (coro_resume_fn_id, coro_destroy_fn_id, + coro_promise_id, coro_frame_needs_free_id, coro_resume_index_id, + coro_self_handle_id, coro_actor_continue_id, + coro_frame_i_a_r_c_id): New. + (coro_init_identifiers): Initialize new name identifiers. + (coro_promise_type_found_p): Use pre-built identifiers. + (struct await_xform_data): Remove unused fields. + (transform_await_expr): Delete code that is now unused. + (build_actor_fn): Simplify interface, use pre-built identifiers and + remove transforms that are no longer needed. + (build_destroy_fn): Use revised field names. + (register_local_var_uses): Use pre-built identifiers. + (coro_rewrite_function_body): Simplify interface, use pre-built + identifiers. Generate proxy vars in the outer bind expr scope for the + implementation state that we wish to expose. + (morph_fn_to_coro): Adjust comments for new variable names, use pre- + built identifiers. Remove unused code to generate frame entries for + the implementation state. Adjust call for build_actor_fn. + +2021-09-14 Patrick Palka <ppalka@redhat.com> + + PR c++/102163 + * constexpr.c (cxx_eval_call_expression): After evaluating a + subobject constructor call for an empty union member, produce a + side effect that makes sure the member gets activated. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + PR c++/102295 + * decl.c (layout_var_decl): For aggregates ending with a flexible + array member, add the size of the initializer for that member to + DECL_SIZE and DECL_SIZE_UNIT. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + PR c++/102305 + * method.c (is_xible_helper): Call complete_type on to. + +2021-09-14 Jason Merrill <jason@redhat.com> + + * decl.c (cxx_init_decl_processing): Don't warn if L1 cache line + size is smaller than maxalign. + +2021-09-13 Jason Merrill <jason@redhat.com> + + * constexpr.c (maybe_warn_about_constant_value): + Complain about std::hardware_destructive_interference_size. + (cxx_eval_constant_expression): Call it. + * decl.c (cxx_init_decl_processing): Check + --param *-interference-size values. + +2021-09-13 Patrick Palka <ppalka@redhat.com> + + PR c++/101764 + * cp-tree.h (PACK_EXPANSION_FORCE_EXTRA_ARGS_P): New accessor + macro. + * pt.c (has_extra_args_mechanism_p): New function. + (find_parameter_pack_data::found_extra_args_tree_p): New data + member. + (find_parameter_packs_r): Set ppd->found_extra_args_tree_p + appropriately. + (make_pack_expansion): Set PACK_EXPANSION_FORCE_EXTRA_ARGS_P if + ppd.found_extra_args_tree_p. + (use_pack_expansion_extra_args_p): Return true if there were + unsubstituted packs and PACK_EXPANSION_FORCE_EXTRA_ARGS_P. + (tsubst_pack_expansion): Pass the pack expansion to + use_pack_expansion_extra_args_p. + 2021-09-10 Jakub Jelinek <jakub@redhat.com> * parser.c (cp_parser_omp_atomic): Allow acq_rel on atomic read/write diff --git a/gcc/cp/call.c b/gcc/cp/call.c index b6011c1..c5601d9 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -4175,6 +4175,9 @@ build_user_type_conversion_1 (tree totype, tree expr, int flags, flags |= LOOKUP_NO_CONVERSION; if (BRACE_ENCLOSED_INITIALIZER_P (expr)) flags |= LOOKUP_NO_NARROWING; + /* Prevent add_candidates from treating a non-strictly viable candidate + as unviable. */ + complain |= tf_conv; /* It's OK to bind a temporary for converting constructor arguments, but not in converting the return value of a conversion operator. */ @@ -6232,8 +6235,18 @@ add_candidates (tree fns, tree first_arg, const vec<tree, va_gc> *args, stopped at the first bad conversion). Add the function to BAD_FNS to fully reconsider later if we don't find any strictly viable candidates. */ - bad_fns = lookup_add (fn, bad_fns); - *candidates = (*candidates)->next; + if (complain & (tf_error | tf_conv)) + { + bad_fns = lookup_add (fn, bad_fns); + *candidates = (*candidates)->next; + } + else + /* But if we're in a SFINAE context, just mark this candidate as + unviable outright and avoid potentially reconsidering it. + This is safe to do because in a SFINAE context, performing a bad + conversion is always an error (even with -fpermissive), so a + non-strictly viable candidate is effectively unviable anyway. */ + cand->viable = 0; } } if (which == non_templates && !seen_perfect) diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 0c2498a..8a5dd06 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -2787,12 +2787,34 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, &jump_target); if (DECL_CONSTRUCTOR_P (fun)) - /* This can be null for a subobject constructor call, in - which case what we care about is the initialization - side-effects rather than the value. We could get at the - value by evaluating *this, but we don't bother; there's - no need to put such a call in the hash table. */ - result = lval ? ctx->object : ctx->ctor; + { + /* This can be null for a subobject constructor call, in + which case what we care about is the initialization + side-effects rather than the value. We could get at the + value by evaluating *this, but we don't bother; there's + no need to put such a call in the hash table. */ + result = lval ? ctx->object : ctx->ctor; + + /* If we've just evaluated a subobject constructor call for an + empty union member, it might not have produced a side effect + that actually activated the union member. So produce such a + side effect now to ensure the union appears initialized. */ + if (!result && new_obj + && TREE_CODE (new_obj) == COMPONENT_REF + && TREE_CODE (TREE_TYPE + (TREE_OPERAND (new_obj, 0))) == UNION_TYPE + && is_really_empty_class (TREE_TYPE (new_obj), + /*ignore_vptr*/false)) + { + tree activate = build2 (MODIFY_EXPR, TREE_TYPE (new_obj), + new_obj, + build_constructor (TREE_TYPE (new_obj), + NULL)); + cxx_eval_constant_expression (ctx, activate, lval, + non_constant_p, overflow_p); + ggc_free (activate); + } + } else if (VOID_TYPE_P (TREE_TYPE (res))) result = void_node; else @@ -7480,8 +7502,7 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, /* Remember the original location if that wouldn't need a wrapper. */ if (location_t loc = EXPR_LOCATION (t)) - if (CAN_HAVE_LOCATION_P (r)) - SET_EXPR_LOCATION (r, loc); + protected_set_expr_location (r, loc); return r; } diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 1aaf1e2..2896efd 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -918,20 +918,22 @@ get_normalized_constraints_from_decl (tree d, bool diag = false) tmpl = most_general_template (tmpl); } + d = tmpl ? tmpl : decl; + /* If we're not diagnosing errors, use cached constraints, if any. */ if (!diag) - if (tree *p = hash_map_safe_get (normalized_map, tmpl)) + if (tree *p = hash_map_safe_get (normalized_map, d)) return *p; tree norm = NULL_TREE; - if (tree ci = get_constraints (decl)) + if (tree ci = get_constraints (d)) { push_access_scope_guard pas (decl); norm = get_normalized_constraints_from_info (ci, tmpl, diag); } if (!diag) - hash_map_safe_put<hm_ggc> (normalized_map, tmpl, norm); + hash_map_safe_put<hm_ggc> (normalized_map, d, norm); return norm; } diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 9ab2be0..fbd5c49 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -215,7 +215,19 @@ static GTY(()) tree coro_await_ready_identifier; static GTY(()) tree coro_await_suspend_identifier; static GTY(()) tree coro_await_resume_identifier; -/* Create the identifiers used by the coroutines library interfaces. */ +/* Accessors for the coroutine frame state used by the implementation. */ + +static GTY(()) tree coro_resume_fn_id; +static GTY(()) tree coro_destroy_fn_id; +static GTY(()) tree coro_promise_id; +static GTY(()) tree coro_frame_needs_free_id; +static GTY(()) tree coro_resume_index_id; +static GTY(()) tree coro_self_handle_id; +static GTY(()) tree coro_actor_continue_id; +static GTY(()) tree coro_frame_i_a_r_c_id; + +/* Create the identifiers used by the coroutines library interfaces and + the implementation frame state. */ static void coro_init_identifiers () @@ -241,6 +253,16 @@ coro_init_identifiers () coro_await_ready_identifier = get_identifier ("await_ready"); coro_await_suspend_identifier = get_identifier ("await_suspend"); coro_await_resume_identifier = get_identifier ("await_resume"); + + /* Coroutine state frame field accessors. */ + coro_resume_fn_id = get_identifier ("_Coro_resume_fn"); + coro_destroy_fn_id = get_identifier ("_Coro_destroy_fn"); + coro_promise_id = get_identifier ("_Coro_promise"); + coro_frame_needs_free_id = get_identifier ("_Coro_frame_needs_free"); + coro_frame_i_a_r_c_id = get_identifier ("_Coro_initial_await_resume_called"); + coro_resume_index_id = get_identifier ("_Coro_resume_index"); + coro_self_handle_id = get_identifier ("_Coro_self_handle"); + coro_actor_continue_id = get_identifier ("_Coro_actor_continue"); } /* Trees we only need to set up once. */ @@ -513,12 +535,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc) /* Build a proxy for a handle to "self" as the param to await_suspend() calls. */ coro_info->self_h_proxy - = build_lang_decl (VAR_DECL, get_identifier ("self_h.proxy"), + = build_lang_decl (VAR_DECL, coro_self_handle_id, coro_info->handle_type); /* Build a proxy for the promise so that we can perform lookups. */ coro_info->promise_proxy - = build_lang_decl (VAR_DECL, get_identifier ("promise.proxy"), + = build_lang_decl (VAR_DECL, coro_promise_id, coro_info->promise_type); /* Note where we first saw a coroutine keyword. */ @@ -1864,10 +1886,6 @@ struct await_xform_data { tree actor_fn; /* Decl for context. */ tree actor_frame; - tree promise_proxy; - tree real_promise; - tree self_h_proxy; - tree real_self_h; }; /* When we built the await expressions, we didn't know the coro frame @@ -1888,7 +1906,6 @@ transform_await_expr (tree await_expr, await_xform_data *xform) /* So, on entry, we have: in : CO_AWAIT_EXPR (a, e_proxy, o, awr_call_vector, mode) We no longer need a [it had diagnostic value, maybe?] - We need to replace the promise proxy in all elements We need to replace the e_proxy in the awr_call. */ tree coro_frame_type = TREE_TYPE (xform->actor_frame); @@ -1914,16 +1931,6 @@ transform_await_expr (tree await_expr, await_xform_data *xform) TREE_OPERAND (await_expr, 1) = as; } - /* Now do the self_handle. */ - data.from = xform->self_h_proxy; - data.to = xform->real_self_h; - cp_walk_tree (&await_expr, replace_proxy, &data, NULL); - - /* Now do the promise. */ - data.from = xform->promise_proxy; - data.to = xform->real_promise; - cp_walk_tree (&await_expr, replace_proxy, &data, NULL); - return await_expr; } @@ -1957,6 +1964,7 @@ transform_await_wrapper (tree *stmt, int *do_subtree, void *d) struct param_info { tree field_id; /* The name of the copy in the coroutine frame. */ + tree copy_var; /* The local var proxy for the frame copy. */ vec<tree *> *body_uses; /* Worklist of uses, void if there are none. */ tree frame_type; /* The type used to represent this parm in the frame. */ tree orig_type; /* The original type of the parm (not as passed). */ @@ -2110,15 +2118,13 @@ coro_get_frame_dtor (tree coro_fp, tree orig, tree frame_size, static void build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, - tree orig, hash_map<tree, param_info> *param_uses, - hash_map<tree, local_var_info> *local_var_uses, - vec<tree, va_gc> *param_dtor_list, tree resume_fn_field, - tree resume_idx_field, unsigned body_count, tree frame_size) + tree orig, hash_map<tree, local_var_info> *local_var_uses, + vec<tree, va_gc> *param_dtor_list, + tree resume_idx_var, unsigned body_count, tree frame_size) { verify_stmt_tree (fnbody); /* Some things we inherit from the original function. */ tree handle_type = get_coroutine_handle_type (orig); - tree self_h_proxy = get_coroutine_self_handle_proxy (orig); tree promise_type = get_coroutine_promise_type (orig); tree promise_proxy = get_coroutine_promise_proxy (orig); @@ -2136,11 +2142,12 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, tree top_block = make_node (BLOCK); BIND_EXPR_BLOCK (actor_bind) = top_block; - tree continuation = coro_build_artificial_var (loc, "_Coro_actor_continue", + tree continuation = coro_build_artificial_var (loc, coro_actor_continue_id, void_coro_handle_type, actor, NULL_TREE); BIND_EXPR_VARS (actor_bind) = continuation; + BLOCK_VARS (top_block) = BIND_EXPR_VARS (actor_bind) ; /* Link in the block associated with the outer scope of the re-written function body. */ @@ -2163,44 +2170,13 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, /* Declare the continuation handle. */ add_decl_expr (continuation); - /* Re-write param references in the body, no code should be generated - here. */ - if (DECL_ARGUMENTS (orig)) - { - tree arg; - for (arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg)) - { - bool existed; - param_info &parm = param_uses->get_or_insert (arg, &existed); - if (!parm.body_uses) - continue; /* Wasn't used in the original function body. */ - - tree fld_ref = lookup_member (coro_frame_type, parm.field_id, - /*protect=*/1, /*want_type=*/0, - tf_warning_or_error); - tree fld_idx = build3_loc (loc, COMPONENT_REF, parm.frame_type, - actor_frame, fld_ref, NULL_TREE); - - /* We keep these in the frame as a regular pointer, so convert that - back to the type expected. */ - if (parm.pt_ref) - fld_idx = build1_loc (loc, CONVERT_EXPR, TREE_TYPE (arg), fld_idx); - - int i; - tree *puse; - FOR_EACH_VEC_ELT (*parm.body_uses, i, puse) - *puse = fld_idx; - } - } - /* Re-write local vars, similarly. */ local_vars_transform xform_vars_data = {actor, actor_frame, coro_frame_type, loc, local_var_uses}; cp_walk_tree (&fnbody, transform_local_var_uses, &xform_vars_data, NULL); - tree resume_idx_name = get_identifier ("__resume_at"); - tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0, - tf_warning_or_error); + tree rat_field = lookup_member (coro_frame_type, coro_resume_index_id, + 1, 0, tf_warning_or_error); tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame, rat_field, NULL_TREE); @@ -2302,14 +2278,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, tree r = build_stmt (loc, LABEL_EXPR, actor_begin_label); add_stmt (r); - /* actor's version of the promise. */ - tree ap_m = lookup_member (coro_frame_type, get_identifier ("__p"), 1, 0, - tf_warning_or_error); - tree ap = build_class_member_access_expr (actor_frame, ap_m, NULL_TREE, false, - tf_warning_or_error); - /* actor's coroutine 'self handle'. */ - tree ash_m = lookup_member (coro_frame_type, get_identifier ("__self_h"), 1, + tree ash_m = lookup_member (coro_frame_type, coro_self_handle_id, 1, 0, tf_warning_or_error); tree ash = build_class_member_access_expr (actor_frame, ash_m, NULL_TREE, false, tf_warning_or_error); @@ -2329,37 +2299,13 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, /* Now we know the real promise, and enough about the frame layout to decide where to put things. */ - await_xform_data xform - = {actor, actor_frame, promise_proxy, ap, self_h_proxy, ash}; + await_xform_data xform = {actor, actor_frame}; /* Transform the await expressions in the function body. Only do each await tree once! */ hash_set<tree> pset; cp_walk_tree (&fnbody, transform_await_wrapper, &xform, &pset); - /* Now replace the promise proxy with its real value. */ - proxy_replace p_data; - p_data.from = promise_proxy; - p_data.to = ap; - cp_walk_tree (&fnbody, replace_proxy, &p_data, NULL); - - /* The rewrite of the function adds code to set the __resume field to - nullptr when the coroutine is done and also the index to zero when - calling an unhandled exception. These are represented by two proxies - in the function, so rewrite them to the proper frame access. */ - tree resume_m - = lookup_member (coro_frame_type, get_identifier ("__resume"), - /*protect=*/1, /*want_type=*/0, tf_warning_or_error); - tree res_x = build_class_member_access_expr (actor_frame, resume_m, NULL_TREE, - false, tf_warning_or_error); - p_data.from = resume_fn_field; - p_data.to = res_x; - cp_walk_tree (&fnbody, replace_proxy, &p_data, NULL); - - p_data.from = resume_idx_field; - p_data.to = rat; - cp_walk_tree (&fnbody, replace_proxy, &p_data, NULL); - /* Add in our function body with the co_returns rewritten to final form. */ add_stmt (fnbody); @@ -2368,7 +2314,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, add_stmt (r); /* Destructors for the things we built explicitly. */ - r = build_special_member_call (ap, complete_dtor_identifier, NULL, + r = build_special_member_call (promise_proxy, complete_dtor_identifier, NULL, promise_type, LOOKUP_NORMAL, tf_warning_or_error); add_stmt (r); @@ -2381,7 +2327,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, /* Here deallocate the frame (if we allocated it), which we will have at present. */ tree fnf_m - = lookup_member (coro_frame_type, get_identifier ("__frame_needs_free"), 1, + = lookup_member (coro_frame_type, coro_frame_needs_free_id, 1, 0, tf_warning_or_error); tree fnf2_x = build_class_member_access_expr (actor_frame, fnf_m, NULL_TREE, false, tf_warning_or_error); @@ -2460,18 +2406,10 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, gcc_checking_assert (maybe_cleanup_point_expr_void (r) == r); add_stmt (r); - /* We will need to know which resume point number should be encoded. */ - tree res_idx_m - = lookup_member (coro_frame_type, resume_idx_name, - /*protect=*/1, /*want_type=*/0, tf_warning_or_error); - tree resume_pt_number - = build_class_member_access_expr (actor_frame, res_idx_m, NULL_TREE, false, - tf_warning_or_error); - /* We've now rewritten the tree and added the initial and final co_awaits. Now pass over the tree and expand the co_awaits. */ - coro_aw_data data = {actor, actor_fp, resume_pt_number, NULL_TREE, + coro_aw_data data = {actor, actor_fp, resume_idx_var, NULL_TREE, ash, del_promise_label, ret_label, continue_label, continuation, 2}; cp_walk_tree (&actor_body, await_statement_expander, &data, NULL); @@ -2485,7 +2423,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody, } /* The prototype 'destroy' function : - frame->__resume_at |= 1; + frame->__Coro_resume_index |= 1; actor (frame); */ static void @@ -2504,11 +2442,10 @@ build_destroy_fn (location_t loc, tree coro_frame_type, tree destroy, tree destr_frame = build1 (INDIRECT_REF, coro_frame_type, destr_fp); - tree resume_idx_name = get_identifier ("__resume_at"); - tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0, - tf_warning_or_error); - tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, destr_frame, - rat_field, NULL_TREE); + tree rat_field = lookup_member (coro_frame_type, coro_resume_index_id, + 1, 0, tf_warning_or_error); + tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, + destr_frame, rat_field, NULL_TREE); /* _resume_at |= 1 */ tree dstr_idx = build2 (BIT_IOR_EXPR, short_unsigned_type_node, rat, @@ -3475,16 +3412,11 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) return NULL_TREE; } - /* We have something to be handled as a single statement. */ - bool has_cleanup_wrapper = TREE_CODE (*stmt) == CLEANUP_POINT_EXPR; - hash_set<tree> visited; - awpts->saw_awaits = 0; - hash_set<tree> truth_aoif_to_expand; - awpts->truth_aoif_to_expand = &truth_aoif_to_expand; - awpts->needs_truth_if_exp = false; - awpts->has_awaiter_init = false; + /* We have something to be handled as a single statement. We have to handle + a few statements specially where await statements have to be moved out of + constructs. */ tree expr = *stmt; - if (has_cleanup_wrapper) + if (TREE_CODE (*stmt) == CLEANUP_POINT_EXPR) expr = TREE_OPERAND (expr, 0); STRIP_NOPS (expr); @@ -3500,6 +3432,8 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) transforms can be implemented. */ case IF_STMT: { + tree *await_ptr; + hash_set<tree> visited; /* Transform 'if (cond with awaits) then stmt1 else stmt2' into bool cond = cond with awaits. if (cond) then stmt1 else stmt2. */ @@ -3507,10 +3441,8 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) /* We treat the condition as if it was a stand-alone statement, to see if there are any await expressions which will be analyzed and registered. */ - if ((res = cp_walk_tree (&IF_COND (if_stmt), - analyze_expression_awaits, d, &visited))) - return res; - if (!awpts->saw_awaits) + if (!(cp_walk_tree (&IF_COND (if_stmt), + find_any_await, &await_ptr, &visited))) return NULL_TREE; /* Nothing special to do here. */ gcc_checking_assert (!awpts->bind_stack->is_empty()); @@ -3526,7 +3458,7 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) /* We want to initialize the new variable with the expression that contains the await(s) and potentially also needs to have truth_if expressions expanded. */ - tree new_s = build2_loc (sloc, MODIFY_EXPR, boolean_type_node, + tree new_s = build2_loc (sloc, INIT_EXPR, boolean_type_node, newvar, cond_inner); finish_expr_stmt (new_s); IF_COND (if_stmt) = newvar; @@ -3540,25 +3472,27 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) break; case FOR_STMT: { + tree *await_ptr; + hash_set<tree> visited; /* for loops only need special treatment if the condition or the iteration expression contain a co_await. */ tree for_stmt = *stmt; - /* Sanity check. */ - if ((res = cp_walk_tree (&FOR_INIT_STMT (for_stmt), - analyze_expression_awaits, d, &visited))) - return res; - gcc_checking_assert (!awpts->saw_awaits); - - if ((res = cp_walk_tree (&FOR_COND (for_stmt), - analyze_expression_awaits, d, &visited))) - return res; - bool for_cond_await = awpts->saw_awaits != 0; - unsigned save_awaits = awpts->saw_awaits; - - if ((res = cp_walk_tree (&FOR_EXPR (for_stmt), - analyze_expression_awaits, d, &visited))) - return res; - bool for_expr_await = awpts->saw_awaits > save_awaits; + /* At present, the FE always generates a separate initializer for + the FOR_INIT_STMT, when the expression has an await. Check that + this assumption holds in the future. */ + gcc_checking_assert + (!(cp_walk_tree (&FOR_INIT_STMT (for_stmt), find_any_await, + &await_ptr, &visited))); + + visited.empty (); + bool for_cond_await + = cp_walk_tree (&FOR_COND (for_stmt), find_any_await, + &await_ptr, &visited); + + visited.empty (); + bool for_expr_await + = cp_walk_tree (&FOR_EXPR (for_stmt), find_any_await, + &await_ptr, &visited); /* If the condition has an await, then we will need to rewrite the loop as @@ -3601,7 +3535,12 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) = create_named_label_with_ctx (sloc, buf, NULL_TREE); free (buf); add_stmt (build_stmt (sloc, LABEL_EXPR, it_expr_label)); - add_stmt (FOR_EXPR (for_stmt)); + tree for_expr = FOR_EXPR (for_stmt); + /* Present the iteration expression as a statement. */ + if (TREE_CODE (for_expr) == CLEANUP_POINT_EXPR) + for_expr = TREE_OPERAND (for_expr, 0); + STRIP_NOPS (for_expr); + finish_expr_stmt (for_expr); FOR_EXPR (for_stmt) = NULL_TREE; FOR_BODY (for_stmt) = pop_stmt_list (insert_list); /* rewrite continue statements to goto label. */ @@ -3628,11 +3567,11 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) break; stmt.. } */ + tree *await_ptr; + hash_set<tree> visited; tree while_stmt = *stmt; - if ((res = cp_walk_tree (&WHILE_COND (while_stmt), - analyze_expression_awaits, d, &visited))) - return res; - if (!awpts->saw_awaits) + if (!(cp_walk_tree (&WHILE_COND (while_stmt), + find_any_await, &await_ptr, &visited))) return NULL_TREE; /* Nothing special to do here. */ tree insert_list = push_stmt_list (); @@ -3658,10 +3597,10 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) break; } while (true); */ tree do_stmt = *stmt; - if ((res = cp_walk_tree (&DO_COND (do_stmt), - analyze_expression_awaits, d, &visited))) - return res; - if (!awpts->saw_awaits) + tree *await_ptr; + hash_set<tree> visited; + if (!(cp_walk_tree (&DO_COND (do_stmt), + find_any_await, &await_ptr, &visited))) return NULL_TREE; /* Nothing special to do here. */ tree insert_list = push_stmt_list (); @@ -3684,10 +3623,10 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) switch_type cond = cond with awaits switch (cond) stmt. */ tree sw_stmt = *stmt; - if ((res = cp_walk_tree (&SWITCH_STMT_COND (sw_stmt), - analyze_expression_awaits, d, &visited))) - return res; - if (!awpts->saw_awaits) + tree *await_ptr; + hash_set<tree> visited; + if (!(cp_walk_tree (&SWITCH_STMT_COND (sw_stmt), + find_any_await, &await_ptr, &visited))) return NULL_TREE; /* Nothing special to do here. */ gcc_checking_assert (!awpts->bind_stack->is_empty()); @@ -3728,9 +3667,6 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) { expr; p.return_void(); goto final_suspend;} - for co_return [non void expr]; { p.return_value(expr); goto final_suspend; } */ - if ((res = cp_walk_tree (stmt, analyze_expression_awaits, - d, &visited))) - return res; location_t loc = EXPR_LOCATION (expr); tree call = TREE_OPERAND (expr, 1); expr = TREE_OPERAND (expr, 0); @@ -3738,39 +3674,33 @@ await_statement_walker (tree *stmt, int *do_subtree, void *d) /* [stmt.return.coroutine], 2.2 If expr is present and void, it is placed immediately before the call for return_void; */ - tree *maybe_await_stmt = NULL; if (expr && VOID_TYPE_P (TREE_TYPE (expr))) - { - finish_expr_stmt (expr); - /* If the return argument was a void expression, then any - awaits must be contained in that. */ - maybe_await_stmt = tsi_stmt_ptr (tsi_last (ret_list)); - } + finish_expr_stmt (expr); /* Insert p.return_{void,value(expr)}. */ finish_expr_stmt (call); - /* Absent a return of a void expression, any awaits must be in - the parameter to return_value(). */ - if (!maybe_await_stmt) - maybe_await_stmt = tsi_stmt_ptr (tsi_last (ret_list)); TREE_USED (awpts->fs_label) = 1; add_stmt (build_stmt (loc, GOTO_EXPR, awpts->fs_label)); *stmt = pop_stmt_list (ret_list); + res = cp_walk_tree (stmt, await_statement_walker, d, NULL); /* Once this is complete, we will have processed subtrees. */ *do_subtree = 0; - if (awpts->saw_awaits) - { - gcc_checking_assert (maybe_await_stmt); - res = cp_walk_tree (maybe_await_stmt, await_statement_walker, - d, NULL); - if (res) - return res; - } - return NULL_TREE; /* Done. */ + return res; } break; } else if (EXPR_P (expr)) { + hash_set<tree> visited; + tree *await_ptr; + if (!(cp_walk_tree (stmt, find_any_await, &await_ptr, &visited))) + return NULL_TREE; /* Nothing special to do here. */ + + visited.empty (); + awpts->saw_awaits = 0; + hash_set<tree> truth_aoif_to_expand; + awpts->truth_aoif_to_expand = &truth_aoif_to_expand; + awpts->needs_truth_if_exp = false; + awpts->has_awaiter_init = false; if ((res = cp_walk_tree (stmt, analyze_expression_awaits, d, &visited))) return res; *do_subtree = 0; /* Done subtrees. */ @@ -3805,11 +3735,11 @@ struct param_frame_data bool param_seen; }; -/* A tree-walk callback that records the use of parameters (to allow for - optimizations where handling unused parameters may be omitted). */ +/* A tree walk callback that rewrites each parm use to the local variable + that represents its copy in the frame. */ static tree -register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) +rewrite_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) { param_frame_data *data = (param_frame_data *) d; @@ -3817,7 +3747,7 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) if (TREE_CODE (*stmt) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (*stmt)) { tree t = DECL_VALUE_EXPR (*stmt); - return cp_walk_tree (&t, register_param_uses, d, NULL); + return cp_walk_tree (&t, rewrite_param_uses, d, NULL); } if (TREE_CODE (*stmt) != PARM_DECL) @@ -3831,16 +3761,88 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d) param_info &parm = data->param_uses->get_or_insert (*stmt, &existed); gcc_checking_assert (existed); - if (!parm.body_uses) + *stmt = parm.copy_var; + return NULL_TREE; +} + +/* Build up a set of info that determines how each param copy will be + handled. */ + +static hash_map<tree, param_info> * +analyze_fn_parms (tree orig) +{ + if (!DECL_ARGUMENTS (orig)) + return NULL; + + hash_map<tree, param_info> *param_uses = new hash_map<tree, param_info>; + + /* Build a hash map with an entry for each param. + The key is the param tree. + Then we have an entry for the frame field name. + Then a cache for the field ref when we come to use it. + Then a tree list of the uses. + The second two entries start out empty - and only get populated + when we see uses. */ + bool lambda_p = LAMBDA_FUNCTION_P (orig); + + unsigned no_name_parm = 0; + for (tree arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg)) { - vec_alloc (parm.body_uses, 4); - parm.body_uses->quick_push (stmt); - data->param_seen = true; + bool existed; + param_info &parm = param_uses->get_or_insert (arg, &existed); + gcc_checking_assert (!existed); + parm.body_uses = NULL; + tree actual_type = TREE_TYPE (arg); + actual_type = complete_type_or_else (actual_type, orig); + if (actual_type == NULL_TREE) + actual_type = error_mark_node; + parm.orig_type = actual_type; + parm.by_ref = parm.pt_ref = parm.rv_ref = false; + if (TREE_CODE (actual_type) == REFERENCE_TYPE) + { + /* If the user passes by reference, then we will save the + pointer to the original. As noted in + [dcl.fct.def.coroutine] / 13, if the lifetime of the + referenced item ends and then the coroutine is resumed, + we have UB; well, the user asked for it. */ + if (TYPE_REF_IS_RVALUE (actual_type)) + parm.rv_ref = true; + else + parm.pt_ref = true; + } + else if (TYPE_REF_P (DECL_ARG_TYPE (arg))) + parm.by_ref = true; + + parm.frame_type = actual_type; + + parm.this_ptr = is_this_parameter (arg); + parm.lambda_cobj = lambda_p && DECL_NAME (arg) == closure_identifier; + + tree name = DECL_NAME (arg); + if (!name) + { + char *buf = xasprintf ("_Coro_unnamed_parm_%d", no_name_parm++); + name = get_identifier (buf); + free (buf); + } + parm.field_id = name; + + if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (parm.frame_type)) + { + char *buf = xasprintf ("_Coro_%s_live", IDENTIFIER_POINTER (name)); + parm.guard_var = build_lang_decl (VAR_DECL, get_identifier (buf), + boolean_type_node); + free (buf); + DECL_ARTIFICIAL (parm.guard_var) = true; + DECL_CONTEXT (parm.guard_var) = orig; + DECL_INITIAL (parm.guard_var) = boolean_false_node; + parm.trivial_dtor = false; + } + else + parm.trivial_dtor = true; } - else - parm.body_uses->safe_push (stmt); - return NULL_TREE; + return param_uses; } /* Small helper for the repetitive task of adding a new field to the coro @@ -3927,6 +3929,7 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) identify them in the coroutine frame. */ tree lvname = DECL_NAME (lvar); char *buf; + /* The outermost bind scope contains the artificial variables that we inject to implement the coro state machine. We want to be able to inspect these in debugging. */ @@ -3936,7 +3939,7 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) buf = xasprintf ("%s_%u_%u", IDENTIFIER_POINTER (lvname), lvd->nest_depth, lvd->bind_indx); else - buf = xasprintf ("_D%u.%u.%u", DECL_UID (lvar), lvd->nest_depth, + buf = xasprintf ("_D%u_%u_%u", DECL_UID (lvar), lvd->nest_depth, lvd->bind_indx); /* TODO: Figure out if we should build a local type that has any excess alignment or size from the original decl. */ @@ -4023,8 +4026,9 @@ coro_build_actor_or_destroy_function (tree orig, tree fn_type, static tree coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, - tree resume_fn_ptr_type, tree& resume_fn_field, - tree& resume_idx_field, tree& fs_label) + hash_map<tree, param_info> *param_uses, + tree resume_fn_ptr_type, + tree& resume_idx_var, tree& fs_label) { /* This will be our new outer scope. */ tree update_body = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); @@ -4057,7 +4061,6 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, /* Wrap the function body in a try {} catch (...) {} block, if exceptions are enabled. */ - tree promise = get_coroutine_promise_proxy (orig); tree var_list = NULL_TREE; tree initial_await = build_init_or_final_await (fn_start, false); @@ -4068,24 +4071,94 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, tree return_void = get_coroutine_return_void_expr (current_function_decl, fn_start, false); + /* The pointer to the resume function. */ + tree resume_fn_ptr + = coro_build_artificial_var (fn_start, coro_resume_fn_id, + resume_fn_ptr_type, orig, NULL_TREE); + DECL_CHAIN (resume_fn_ptr) = var_list; + var_list = resume_fn_ptr; + add_decl_expr (resume_fn_ptr); + /* We will need to be able to set the resume function pointer to nullptr to signal that the coroutine is 'done'. */ - resume_fn_field - = build_lang_decl (VAR_DECL, get_identifier ("resume.fn.ptr.proxy"), - resume_fn_ptr_type); - DECL_ARTIFICIAL (resume_fn_field) = true; tree zero_resume = build1 (CONVERT_EXPR, resume_fn_ptr_type, integer_zero_node); - zero_resume - = build2 (INIT_EXPR, resume_fn_ptr_type, resume_fn_field, zero_resume); - /* Likewise, the resume index needs to be reset. */ - resume_idx_field - = build_lang_decl (VAR_DECL, get_identifier ("resume.index.proxy"), - short_unsigned_type_node); - DECL_ARTIFICIAL (resume_idx_field) = true; - tree zero_resume_idx = build_int_cst (short_unsigned_type_node, 0); - zero_resume_idx = build2 (INIT_EXPR, short_unsigned_type_node, - resume_idx_field, zero_resume_idx); + + /* The pointer to the destroy function. */ + tree var = coro_build_artificial_var (fn_start, coro_destroy_fn_id, + resume_fn_ptr_type, orig, NULL_TREE); + DECL_CHAIN (var) = var_list; + var_list = var; + add_decl_expr (var); + + /* The promise was created on demand when parsing we now link it into + our scope. */ + tree promise = get_coroutine_promise_proxy (orig); + DECL_CONTEXT (promise) = orig; + DECL_SOURCE_LOCATION (promise) = fn_start; + DECL_CHAIN (promise) = var_list; + var_list = promise; + add_decl_expr (promise); + + /* We need a handle to this coroutine, which is passed to every + await_suspend(). This was created on demand when parsing we now link it + into our scope. */ + var = get_coroutine_self_handle_proxy (orig); + DECL_CONTEXT (var) = orig; + DECL_SOURCE_LOCATION (var) = fn_start; + DECL_CHAIN (var) = var_list; + var_list = var; + add_decl_expr (var); + + /* If we have function parms, then these will be copied to the coroutine + frame. Create a local (proxy) variable for each parm, since the original + parms will be out of scope once the ramp has finished. The proxy vars will + get DECL_VALUE_EXPRs pointing to the frame copies, so that we can interact + with them in the debugger. */ + if (param_uses) + { + gcc_checking_assert (DECL_ARGUMENTS (orig)); + /* Add a local var for each parm. */ + for (tree arg = DECL_ARGUMENTS (orig); arg != NULL; + arg = DECL_CHAIN (arg)) + { + param_info *parm_i = param_uses->get (arg); + gcc_checking_assert (parm_i); + parm_i->copy_var + = build_lang_decl (VAR_DECL, parm_i->field_id, TREE_TYPE (arg)); + DECL_SOURCE_LOCATION (parm_i->copy_var) = DECL_SOURCE_LOCATION (arg); + DECL_CONTEXT (parm_i->copy_var) = orig; + DECL_ARTIFICIAL (parm_i->copy_var) = true; + DECL_CHAIN (parm_i->copy_var) = var_list; + var_list = parm_i->copy_var; + add_decl_expr (parm_i->copy_var); + } + + /* Now replace all uses of the parms in the function body with the proxy + vars. We want to this to apply to every instance of param's use, so + don't include a 'visited' hash_set on the tree walk, however we will + arrange to visit each containing expression only once. */ + hash_set<tree *> visited; + param_frame_data param_data = {NULL, param_uses, + &visited, fn_start, false}; + cp_walk_tree (&fnbody, rewrite_param_uses, ¶m_data, NULL); + } + + /* We create a resume index, this is initialized in the ramp. */ + resume_idx_var + = coro_build_artificial_var (fn_start, coro_resume_index_id, + short_unsigned_type_node, orig, NULL_TREE); + DECL_CHAIN (resume_idx_var) = var_list; + var_list = resume_idx_var; + add_decl_expr (resume_idx_var); + + /* If the coroutine has a frame that needs to be freed, this will be set by + the ramp. */ + var = coro_build_artificial_var (fn_start, coro_frame_needs_free_id, + boolean_type_node, orig, NULL_TREE); + DECL_CHAIN (var) = var_list; + var_list = var; + add_decl_expr (var); if (flag_exceptions) { @@ -4097,8 +4170,7 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, /* Create and initialize the initial-await-resume-called variable per [dcl.fct.def.coroutine] / 5.3. */ tree i_a_r_c - = coro_build_artificial_var (fn_start, - "_Coro_initial_await_resume_called", + = coro_build_artificial_var (fn_start, coro_frame_i_a_r_c_id, boolean_type_node, orig, boolean_false_node); DECL_CHAIN (i_a_r_c) = var_list; @@ -4151,10 +4223,14 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, If the unhandled exception method returns, then we continue to the final await expression (which duplicates the clearing of the field). */ - finish_expr_stmt (zero_resume); - finish_expr_stmt (zero_resume_idx); - ueh = maybe_cleanup_point_expr_void (ueh); - add_stmt (ueh); + tree r = build2 (MODIFY_EXPR, resume_fn_ptr_type, resume_fn_ptr, + zero_resume); + finish_expr_stmt (r); + tree short_zero = build_int_cst (short_unsigned_type_node, 0); + r = build2 (MODIFY_EXPR, short_unsigned_type_node, resume_idx_var, + short_zero); + finish_expr_stmt (r); + finish_expr_stmt (ueh); finish_handler (handler); TRY_HANDLERS (tcb) = pop_stmt_list (TRY_HANDLERS (tcb)); } @@ -4189,6 +4265,8 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, /* Before entering the final suspend point, we signal that this point has been reached by setting the resume function pointer to zero (this is what the 'done()' builtin tests) as per the current ABI. */ + zero_resume = build2 (MODIFY_EXPR, resume_fn_ptr_type, resume_fn_ptr, + zero_resume); finish_expr_stmt (zero_resume); finish_expr_stmt (build_init_or_final_await (fn_start, true)); BIND_EXPR_BODY (update_body) = pop_stmt_list (BIND_EXPR_BODY (update_body)); @@ -4216,15 +4294,15 @@ coro_rewrite_function_body (location_t fn_start, tree fnbody, tree orig, declare a dummy coro frame. struct _R_frame { using handle_type = coro::coroutine_handle<coro1::promise_type>; - void (*__resume)(_R_frame *); - void (*__destroy)(_R_frame *); - coro1::promise_type __p; - bool frame_needs_free; free the coro frame mem if set. - bool i_a_r_c; [dcl.fct.def.coroutine] / 5.3 - short __resume_at; - handle_type self_handle; - (maybe) parameter copies. - (maybe) local variables saved (including awaitables) + void (*_Coro_resume_fn)(_R_frame *); + void (*_Coro_destroy_fn)(_R_frame *); + coro1::promise_type _Coro_promise; + bool _Coro_frame_needs_free; free the coro frame mem if set. + bool _Coro_i_a_r_c; [dcl.fct.def.coroutine] / 5.3 + short _Coro_resume_index; + handle_type _Coro_self_handle; + parameter copies (were required). + local variables saved (including awaitables) (maybe) trailing space. }; */ @@ -4316,7 +4394,7 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) /* 2. Types we need to define or look up. */ - tree fr_name = get_fn_local_identifier (orig, "frame"); + tree fr_name = get_fn_local_identifier (orig, "Frame"); tree coro_frame_type = xref_tag (record_type, fr_name); DECL_CONTEXT (TYPE_NAME (coro_frame_type)) = current_scope (); tree coro_frame_ptr = build_pointer_type (coro_frame_type); @@ -4333,121 +4411,18 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) /* Construct the wrapped function body; we will analyze this to determine the requirements for the coroutine frame. */ - tree resume_fn_field = NULL_TREE; - tree resume_idx_field = NULL_TREE; + tree resume_idx_var = NULL_TREE; tree fs_label = NULL_TREE; - fnbody = coro_rewrite_function_body (fn_start, fnbody, orig, - act_des_fn_ptr, resume_fn_field, - resume_idx_field, fs_label); + hash_map<tree, param_info> *param_uses = analyze_fn_parms (orig); + + fnbody = coro_rewrite_function_body (fn_start, fnbody, orig, param_uses, + act_des_fn_ptr, + resume_idx_var, fs_label); /* Build our dummy coro frame layout. */ coro_frame_type = begin_class_definition (coro_frame_type); + /* The fields for the coro frame. */ tree field_list = NULL_TREE; - tree resume_name - = coro_make_frame_entry (&field_list, "__resume", - act_des_fn_ptr, fn_start); - tree destroy_name - = coro_make_frame_entry (&field_list, "__destroy", - act_des_fn_ptr, fn_start); - tree promise_name - = coro_make_frame_entry (&field_list, "__p", promise_type, fn_start); - tree fnf_name = coro_make_frame_entry (&field_list, "__frame_needs_free", - boolean_type_node, fn_start); - tree resume_idx_name - = coro_make_frame_entry (&field_list, "__resume_at", - short_unsigned_type_node, fn_start); - - /* We need a handle to this coroutine, which is passed to every - await_suspend(). There's no point in creating it over and over. */ - (void) coro_make_frame_entry (&field_list, "__self_h", handle_type, fn_start); - - /* Now add in fields for function params (if there are any). - We do not attempt elision of copies at this stage, we do analyze the - uses and build worklists to replace those when the state machine is - lowered. */ - - hash_map<tree, param_info> *param_uses = NULL; - if (DECL_ARGUMENTS (orig)) - { - /* Build a hash map with an entry for each param. - The key is the param tree. - Then we have an entry for the frame field name. - Then a cache for the field ref when we come to use it. - Then a tree list of the uses. - The second two entries start out empty - and only get populated - when we see uses. */ - param_uses = new hash_map<tree, param_info>; - bool lambda_p = LAMBDA_FUNCTION_P (orig); - - unsigned no_name_parm = 0; - for (tree arg = DECL_ARGUMENTS (orig); arg != NULL; - arg = DECL_CHAIN (arg)) - { - bool existed; - param_info &parm = param_uses->get_or_insert (arg, &existed); - gcc_checking_assert (!existed); - parm.body_uses = NULL; - tree actual_type = TREE_TYPE (arg); - actual_type = complete_type_or_else (actual_type, orig); - if (actual_type == NULL_TREE) - actual_type = error_mark_node; - parm.orig_type = actual_type; - parm.by_ref = parm.pt_ref = parm.rv_ref = false; - if (TREE_CODE (actual_type) == REFERENCE_TYPE) - { - /* If the user passes by reference, then we will save the - pointer to the original. As noted in - [dcl.fct.def.coroutine] / 13, if the lifetime of the - referenced item ends and then the coroutine is resumed, - we have UB; well, the user asked for it. */ - if (TYPE_REF_IS_RVALUE (actual_type)) - parm.rv_ref = true; - else - parm.pt_ref = true; - } - else if (TYPE_REF_P (DECL_ARG_TYPE (arg))) - parm.by_ref = true; - - parm.frame_type = actual_type; - - parm.this_ptr = is_this_parameter (arg); - parm.lambda_cobj = lambda_p && DECL_NAME (arg) == closure_identifier; - - char *buf; - if (DECL_NAME (arg)) - { - tree pname = DECL_NAME (arg); - buf = xasprintf ("__parm.%s", IDENTIFIER_POINTER (pname)); - } - else - buf = xasprintf ("__unnamed_parm.%d", no_name_parm++); - - if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (parm.frame_type)) - { - char *gbuf = xasprintf ("%s.live", buf); - parm.guard_var - = build_lang_decl (VAR_DECL, get_identifier (gbuf), - boolean_type_node); - free (gbuf); - DECL_ARTIFICIAL (parm.guard_var) = true; - DECL_INITIAL (parm.guard_var) = boolean_false_node; - parm.trivial_dtor = false; - } - else - parm.trivial_dtor = true; - parm.field_id = coro_make_frame_entry - (&field_list, buf, actual_type, DECL_SOURCE_LOCATION (arg)); - free (buf); - } - - /* We want to record every instance of param's use, so don't include - a 'visited' hash_set on the tree walk, but only record a containing - expression once. */ - hash_set<tree *> visited; - param_frame_data param_data - = {&field_list, param_uses, &visited, fn_start, false}; - cp_walk_tree (&fnbody, register_param_uses, ¶m_data, NULL); - } /* We need to know, and inspect, each suspend point in the function in several places. It's convenient to place this map out of line @@ -4761,8 +4736,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) /* For now, once allocation has succeeded we always assume that this needs destruction, there's no impl. for frame allocation elision. */ - tree fnf_m - = lookup_member (coro_frame_type, fnf_name, 1, 0, tf_warning_or_error); + tree fnf_m = lookup_member (coro_frame_type, coro_frame_needs_free_id, + 1, 0,tf_warning_or_error); tree fnf_x = build_class_member_access_expr (deref_fp, fnf_m, NULL_TREE, false, tf_warning_or_error); r = build2 (INIT_EXPR, boolean_type_node, fnf_x, boolean_true_node); @@ -4773,24 +4748,22 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) tree actor_addr = build1 (ADDR_EXPR, act_des_fn_ptr, actor); tree resume_m - = lookup_member (coro_frame_type, resume_name, + = lookup_member (coro_frame_type, coro_resume_fn_id, /*protect=*/1, /*want_type=*/0, tf_warning_or_error); tree resume_x = build_class_member_access_expr (deref_fp, resume_m, NULL_TREE, false, tf_warning_or_error); r = build2_loc (fn_start, INIT_EXPR, act_des_fn_ptr, resume_x, actor_addr); - r = coro_build_cvt_void_expr_stmt (r, fn_start); - add_stmt (r); + finish_expr_stmt (r); tree destroy_addr = build1 (ADDR_EXPR, act_des_fn_ptr, destroy); tree destroy_m - = lookup_member (coro_frame_type, destroy_name, + = lookup_member (coro_frame_type, coro_destroy_fn_id, /*protect=*/1, /*want_type=*/0, tf_warning_or_error); tree destroy_x = build_class_member_access_expr (deref_fp, destroy_m, NULL_TREE, false, tf_warning_or_error); r = build2_loc (fn_start, INIT_EXPR, act_des_fn_ptr, destroy_x, destroy_addr); - r = coro_build_cvt_void_expr_stmt (r, fn_start); - add_stmt (r); + finish_expr_stmt (r); /* [dcl.fct.def.coroutine] /13 When a coroutine is invoked, a copy is created for each coroutine @@ -4881,7 +4854,7 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) /* Set up the promise. */ tree promise_m - = lookup_member (coro_frame_type, promise_name, + = lookup_member (coro_frame_type, coro_promise_id, /*protect=*/1, /*want_type=*/0, tf_warning_or_error); tree p = build_class_member_access_expr (deref_fp, promise_m, NULL_TREE, @@ -5027,9 +5000,9 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) boolean_type_node); finish_expr_stmt (r); } - /* Initialize the resume_idx_name to 0, meaning "not started". */ + /* Initialize the resume_idx_var to 0, meaning "not started". */ tree resume_idx_m - = lookup_member (coro_frame_type, resume_idx_name, + = lookup_member (coro_frame_type, coro_resume_index_id, /*protect=*/1, /*want_type=*/0, tf_warning_or_error); tree resume_idx = build_class_member_access_expr (deref_fp, resume_idx_m, NULL_TREE, false, @@ -5172,9 +5145,9 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) push_deferring_access_checks (dk_no_check); /* Build the actor... */ - build_actor_fn (fn_start, coro_frame_type, actor, fnbody, orig, param_uses, - &local_var_uses, param_dtor_list, resume_fn_field, - resume_idx_field, body_aw_points.await_number, frame_size); + build_actor_fn (fn_start, coro_frame_type, actor, fnbody, orig, + &local_var_uses, param_dtor_list, + resume_idx_var, body_aw_points.await_number, frame_size); /* Destroyer ... */ build_destroy_fn (fn_start, coro_frame_type, destroy, actor); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index a82747c..fb0d5ec 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -204,12 +204,11 @@ enum cp_tree_index /* These are created at init time, but the library/headers provide definitions. */ CPTI_ALIGN_TYPE, - CPTI_CONST_TYPE_INFO_TYPE, - CPTI_TYPE_INFO_PTR_TYPE, CPTI_TERMINATE_FN, CPTI_CALL_UNEXPECTED_FN, /* These are lazily inited. */ + CPTI_CONST_TYPE_INFO_TYPE, CPTI_GET_EXCEPTION_PTR_FN, CPTI_BEGIN_CATCH_FN, CPTI_END_CATCH_FN, @@ -251,7 +250,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; #define abi_node cp_global_trees[CPTI_ABI] #define global_namespace cp_global_trees[CPTI_GLOBAL] #define const_type_info_type_node cp_global_trees[CPTI_CONST_TYPE_INFO_TYPE] -#define type_info_ptr_type cp_global_trees[CPTI_TYPE_INFO_PTR_TYPE] #define conv_op_marker cp_global_trees[CPTI_CONV_OP_MARKER] #define abort_fndecl cp_global_trees[CPTI_ABORT_FNDECL] #define current_aggr cp_global_trees[CPTI_AGGR_TAG] @@ -7138,6 +7136,7 @@ extern void cp_convert_omp_range_for (tree &, vec<tree, va_gc> *, tree &, tree &, tree &, tree &, tree &, tree &); extern void cp_finish_omp_range_for (tree, tree); extern bool parsing_nsdmi (void); +extern bool parsing_function_declarator (); extern bool parsing_default_capturing_generic_lambda_in_template (void); extern void inject_this_parameter (tree, cp_cv_quals); extern location_t defparse_location (tree); @@ -7583,8 +7582,8 @@ extern tree finish_omp_for (location_t, enum tree_code, extern tree finish_omp_for_block (tree, tree); extern void finish_omp_atomic (location_t, enum tree_code, enum tree_code, tree, tree, - tree, tree, tree, tree, - enum omp_memory_order); + tree, tree, tree, tree, tree, + enum omp_memory_order, bool); extern void finish_omp_barrier (void); extern void finish_omp_depobj (location_t, tree, enum omp_clause_depend_kind, diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index c206502..58ddc6a 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -4756,7 +4756,7 @@ cxx_init_decl_processing (void) /* Check that the hardware interference sizes are at least alignof(max_align_t), as required by the standard. */ const int max_align = max_align_t_align () / BITS_PER_UNIT; - if (param_destruct_interfere_size) + if (global_options_set.x_param_destruct_interfere_size) { if (param_destruct_interfere_size < max_align) error ("%<--param destructive-interference-size=%d%> is less than " @@ -4767,21 +4767,26 @@ cxx_init_decl_processing (void) "is less than %<--param l1-cache-line-size=%d%>", param_destruct_interfere_size, param_l1_cache_line_size); } + else if (param_destruct_interfere_size) + /* Assume the internal value is OK. */; else if (param_l1_cache_line_size >= max_align) param_destruct_interfere_size = param_l1_cache_line_size; /* else leave it unset. */ - if (param_construct_interfere_size) + if (global_options_set.x_param_construct_interfere_size) { if (param_construct_interfere_size < max_align) error ("%<--param constructive-interference-size=%d%> is less than " "%d", param_construct_interfere_size, max_align); - else if (param_construct_interfere_size > param_l1_cache_line_size) + else if (param_construct_interfere_size > param_l1_cache_line_size + && param_l1_cache_line_size >= max_align) warning (OPT_Winterference_size, "%<--param constructive-interference-size=%d%> " "is greater than %<--param l1-cache-line-size=%d%>", param_construct_interfere_size, param_l1_cache_line_size); } + else if (param_construct_interfere_size) + /* Assume the internal value is OK. */; else if (param_l1_cache_line_size >= max_align) param_construct_interfere_size = param_l1_cache_line_size; } @@ -6123,6 +6128,38 @@ layout_var_decl (tree decl) error_at (DECL_SOURCE_LOCATION (decl), "storage size of %qD isn%'t constant", decl); TREE_TYPE (decl) = error_mark_node; + type = error_mark_node; + } + } + + /* If the final element initializes a flexible array field, add the size of + that initializer to DECL's size. */ + if (type != error_mark_node + && DECL_INITIAL (decl) + && TREE_CODE (DECL_INITIAL (decl)) == CONSTRUCTOR + && !vec_safe_is_empty (CONSTRUCTOR_ELTS (DECL_INITIAL (decl))) + && DECL_SIZE (decl) != NULL_TREE + && TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST + && TYPE_SIZE (type) != NULL_TREE + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && tree_int_cst_equal (DECL_SIZE (decl), TYPE_SIZE (type))) + { + constructor_elt &elt = CONSTRUCTOR_ELTS (DECL_INITIAL (decl))->last (); + if (elt.index) + { + tree itype = TREE_TYPE (elt.index); + tree vtype = TREE_TYPE (elt.value); + if (TREE_CODE (itype) == ARRAY_TYPE + && TYPE_DOMAIN (itype) == NULL + && TREE_CODE (vtype) == ARRAY_TYPE + && COMPLETE_TYPE_P (vtype)) + { + DECL_SIZE (decl) + = size_binop (PLUS_EXPR, DECL_SIZE (decl), TYPE_SIZE (vtype)); + DECL_SIZE_UNIT (decl) + = size_binop (PLUS_EXPR, DECL_SIZE_UNIT (decl), + TYPE_SIZE_UNIT (vtype)); + } } } } @@ -13943,6 +13980,17 @@ grokdeclarator (const cp_declarator *declarator, if (declspecs->gnu_thread_keyword_p) SET_DECL_GNU_TLS_P (decl); } + + /* Set the constraints on the declaration. */ + bool memtmpl = (processing_template_decl + > template_class_depth (current_class_type)); + if (memtmpl) + { + tree tmpl_reqs + = TEMPLATE_PARMS_CONSTRAINTS (current_template_parms); + tree ci = build_constraints (tmpl_reqs, NULL_TREE); + set_constraints (decl, ci); + } } else { @@ -14810,9 +14858,11 @@ grok_special_member_properties (tree decl) if (ctor > 1) TYPE_HAS_CONST_COPY_CTOR (class_type) = 1; } - else if (sufficient_parms_p (FUNCTION_FIRST_USER_PARMTYPE (decl))) + + if (sufficient_parms_p (FUNCTION_FIRST_USER_PARMTYPE (decl))) TYPE_HAS_DEFAULT_CONSTRUCTOR (class_type) = 1; - else if (is_list_ctor (decl)) + + if (is_list_ctor (decl)) TYPE_HAS_LIST_CTOR (class_type) = 1; if (DECL_DECLARED_CONSTEXPR_P (decl) diff --git a/gcc/cp/method.c b/gcc/cp/method.c index 353046d..32f7186 100644 --- a/gcc/cp/method.c +++ b/gcc/cp/method.c @@ -2081,6 +2081,7 @@ constructible_expr (tree to, tree from) static tree is_xible_helper (enum tree_code code, tree to, tree from, bool trivial) { + to = complete_type (to); deferring_access_check_sentinel acs (dk_no_deferred); if (VOID_TYPE_P (to) || ABSTRACT_CLASS_TYPE_P (to) || (from && FUNC_OR_METHOD_TYPE_P (from) diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 8e9c61e..ddee8b3 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -3363,12 +3363,9 @@ set_decl_context_in_fn (tree ctx, tree decl) if (!DECL_CONTEXT (decl) /* When parsing the parameter list of a function declarator, - don't set DECL_CONTEXT to an enclosing function. When we - push the PARM_DECLs in order to process the function body, - current_binding_level->this_entity will be set. */ + don't set DECL_CONTEXT to an enclosing function. */ && !(TREE_CODE (decl) == PARM_DECL - && current_binding_level->kind == sk_function_parms - && current_binding_level->this_entity == NULL)) + && parsing_function_declarator ())) DECL_CONTEXT (decl) = ctx; } diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index e44c5c6..20f949e 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -6379,7 +6379,8 @@ cp_parser_unqualified_id (cp_parser* parser, /* DR 2237 (C++20 only): A simple-template-id is no longer valid as the declarator-id of a constructor or destructor. */ - if (token->type == CPP_TEMPLATE_ID && cxx_dialect >= cxx20) + if (token->type == CPP_TEMPLATE_ID && declarator_p + && cxx_dialect >= cxx20) { if (!cp_parser_simulate_error (parser)) error_at (tilde_loc, "template-id not allowed for destructor"); @@ -18404,6 +18405,7 @@ cp_parser_template_name (cp_parser* parser, { /* We're optimizing away the call to cp_parser_lookup_name, but we still need to do this. */ + parser->object_scope = parser->context->object_type; parser->context->object_type = NULL_TREE; return identifier; } @@ -23105,7 +23107,7 @@ cp_parser_direct_declarator (cp_parser* parser, else if (!cp_parser_uncommitted_to_tentative_parse_p (parser)) /* Let compute_array_index_type diagnose this. */; else if (!parser->in_function_body - || current_binding_level->kind == sk_function_parms) + || parsing_function_declarator ()) { /* Normally, the array bound must be an integral constant expression. However, as an extension, we allow VLAs @@ -23829,6 +23831,17 @@ parsing_nsdmi (void) return false; } +/* True if we're parsing a function declarator. */ + +bool +parsing_function_declarator () +{ + /* this_entity is NULL for a function parameter scope while parsing the + declarator; it is set when parsing the body of the function. */ + return (current_binding_level->kind == sk_function_parms + && !current_binding_level->this_entity); +} + /* Parse a late-specified return type, if any. This is not a separate non-terminal, but part of a function declarator, which looks like @@ -33574,7 +33587,8 @@ cp_parser_pre_parsed_nested_name_specifier (cp_parser *parser) /* Set the scope from the stored value. */ parser->scope = saved_checks_value (check_value); parser->qualifying_scope = check_value->qualifying_scope; - parser->object_scope = NULL_TREE; + parser->object_scope = parser->context->object_type; + parser->context->object_type = NULL_TREE; } /* Consume tokens up through a non-nested END token. Returns TRUE if we @@ -40068,19 +40082,56 @@ cp_parser_omp_allocate (cp_parser *parser, cp_token *pragma_tok) capture-block: { v = x; update-stmt; } | { update-stmt; v = x; } | { v = x; x = expr; } - where x and v are lvalue expressions with scalar type. */ + OpenMP 5.1: + # pragma omp atomic compare new-line + conditional-update-atomic + + # pragma omp atomic compare capture new-line + conditional-update-capture-atomic + + conditional-update-atomic: + cond-expr-stmt | cond-update-stmt + cond-expr-stmt: + x = expr ordop x ? expr : x; + x = x ordop expr ? expr : x; + x = x == e ? d : x; + cond-update-stmt: + if (expr ordop x) { x = expr; } + if (x ordop expr) { x = expr; } + if (x == e) { x = d; } + ordop: + <, > + conditional-update-capture-atomic: + v = cond-expr-stmt + { v = x; cond-expr-stmt } + { cond-expr-stmt v = x; } + { v = x; cond-update-stmt } + { cond-update-stmt v = x; } + if (x == e) { x = d; } else { v = x; } + { r = x == e; if (r) { x = d; } } + { r = x == e; if (r) { x = d; } else { v = x; } } + + where x, r and v are lvalue expressions with scalar type, + expr, e and d are expressions with scalar type and e might be + the same as v. */ static void cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) { tree lhs = NULL_TREE, rhs = NULL_TREE, v = NULL_TREE, lhs1 = NULL_TREE; - tree rhs1 = NULL_TREE, orig_lhs; + tree rhs1 = NULL_TREE, orig_lhs, r = NULL_TREE; location_t loc = pragma_tok->location; enum tree_code code = ERROR_MARK, opcode = NOP_EXPR; enum omp_memory_order memory_order = OMP_MEMORY_ORDER_UNSPECIFIED; bool structured_block = false; bool first = true; tree clauses = NULL_TREE; + bool capture = false; + bool compare = false; + bool weak = false; + enum omp_memory_order fail = OMP_MEMORY_ORDER_UNSPECIFIED; + bool no_semicolon = false; + bool extra_scope = false; while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)) { @@ -40100,6 +40151,10 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) enum tree_code new_code = ERROR_MARK; enum omp_memory_order new_memory_order = OMP_MEMORY_ORDER_UNSPECIFIED; + bool new_capture = false; + bool new_compare = false; + bool new_weak = false; + enum omp_memory_order new_fail = OMP_MEMORY_ORDER_UNSPECIFIED; if (!strcmp (p, "read")) new_code = OMP_ATOMIC_READ; @@ -40107,7 +40162,7 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) new_code = NOP_EXPR; else if (!strcmp (p, "update")) new_code = OMP_ATOMIC; - else if (!strcmp (p, "capture")) + else if (openacc && !strcmp (p, "capture")) new_code = OMP_ATOMIC_CAPTURE_NEW; else if (openacc) { @@ -40115,6 +40170,52 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) error_at (cloc, "expected %<read%>, %<write%>, %<update%>, " "or %<capture%> clause"); } + else if (!strcmp (p, "capture")) + new_capture = true; + else if (!strcmp (p, "compare")) + new_compare = true; + else if (!strcmp (p, "weak")) + new_weak = true; + else if (!strcmp (p, "fail")) + { + matching_parens parens; + + cp_lexer_consume_token (parser->lexer); + if (!parens.require_open (parser)) + continue; + + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)) + { + id = cp_lexer_peek_token (parser->lexer)->u.value; + const char *q = IDENTIFIER_POINTER (id); + + if (!strcmp (q, "seq_cst")) + new_fail = OMP_MEMORY_ORDER_SEQ_CST; + else if (!strcmp (q, "acquire")) + new_fail = OMP_MEMORY_ORDER_ACQUIRE; + else if (!strcmp (q, "relaxed")) + new_fail = OMP_MEMORY_ORDER_RELAXED; + } + + if (new_fail != OMP_MEMORY_ORDER_UNSPECIFIED) + { + cp_lexer_consume_token (parser->lexer); + if (fail != OMP_MEMORY_ORDER_UNSPECIFIED) + error_at (cloc, "too many %qs clauses", "fail"); + else + fail = new_fail; + } + else + cp_parser_error (parser, "expected %<seq_cst%>, %<acquire%> " + "or %<relaxed%>"); + if (new_fail == OMP_MEMORY_ORDER_UNSPECIFIED + || !parens.require_close (parser)) + cp_parser_skip_to_closing_parenthesis (parser, + /*recovering=*/true, + /*or_comma=*/false, + /*consume_paren=*/true); + continue; + } else if (!strcmp (p, "seq_cst")) new_memory_order = OMP_MEMORY_ORDER_SEQ_CST; else if (!strcmp (p, "acq_rel")) @@ -40135,8 +40236,9 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) { p = NULL; error_at (cloc, "expected %<read%>, %<write%>, %<update%>, " - "%<capture%>, %<seq_cst%>, %<acq_rel%>, " - "%<release%>, %<relaxed%> or %<hint%> clause"); + "%<capture%>, %<compare%>, %<weak%>, %<fail%>, " + "%<seq_cst%>, %<acq_rel%>, %<release%>, " + "%<relaxed%> or %<hint%> clause"); } if (p) { @@ -40159,6 +40261,27 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) else memory_order = new_memory_order; } + else if (new_capture) + { + if (capture) + error_at (cloc, "too many %qs clauses", "capture"); + else + capture = true; + } + else if (new_compare) + { + if (compare) + error_at (cloc, "too many %qs clauses", "compare"); + else + compare = true; + } + else if (new_weak) + { + if (weak) + error_at (cloc, "too many %qs clauses", "weak"); + else + weak = true; + } cp_lexer_consume_token (parser->lexer); continue; } @@ -40169,6 +40292,30 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) if (code == ERROR_MARK) code = OMP_ATOMIC; + if (capture) + { + if (code != OMP_ATOMIC) + error_at (loc, "%qs clause is incompatible with %<read%> or %<write%> " + "clauses", "capture"); + else + code = OMP_ATOMIC_CAPTURE_NEW; + } + if (compare && code != OMP_ATOMIC && code != OMP_ATOMIC_CAPTURE_NEW) + { + error_at (loc, "%qs clause is incompatible with %<read%> or %<write%> " + "clauses", "compare"); + compare = false; + } + if (fail != OMP_MEMORY_ORDER_UNSPECIFIED && !compare) + { + error_at (loc, "%qs clause requires %qs clause", "fail", "compare"); + fail = OMP_MEMORY_ORDER_UNSPECIFIED; + } + if (weak && !compare) + { + error_at (loc, "%qs clause requires %qs clause", "weak", "compare"); + weak = false; + } if (openacc) memory_order = OMP_MEMORY_ORDER_RELAXED; else if (memory_order == OMP_MEMORY_ORDER_UNSPECIFIED) @@ -40230,6 +40377,10 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) default: break; } + if (fail != OMP_MEMORY_ORDER_UNSPECIFIED) + memory_order + = (enum omp_memory_order) (memory_order + | (fail << OMP_FAIL_MEMORY_ORDER_SHIFT)); switch (code) { @@ -40262,6 +40413,9 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) cp_lexer_consume_token (parser->lexer); structured_block = true; } + else if (compare + && cp_lexer_next_token_is_keyword (parser->lexer, RID_IF)) + break; else { v = cp_parser_unary_expression (parser); @@ -40269,12 +40423,179 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) goto saw_error; if (!cp_parser_require (parser, CPP_EQ, RT_EQ)) goto saw_error; + if (compare + && cp_lexer_next_token_is_keyword (parser->lexer, RID_IF)) + { + location_t eloc = cp_lexer_peek_token (parser->lexer)->location; + error_at (eloc, "expected expression"); + goto saw_error; + } } default: break; } restart: + if (compare && cp_lexer_next_token_is_keyword (parser->lexer, RID_IF)) + { + cp_lexer_consume_token (parser->lexer); + + matching_parens parens; + if (!parens.require_open (parser)) + goto saw_error; + location_t eloc = cp_lexer_peek_token (parser->lexer)->location; + tree cmp_expr; + if (r) + cmp_expr = cp_parser_unary_expression (parser); + else + cmp_expr = cp_parser_binary_expression (parser, false, true, + PREC_NOT_OPERATOR, NULL); + if (!parens.require_close (parser)) + cp_parser_skip_to_closing_parenthesis (parser, true, false, true); + if (cmp_expr == error_mark_node) + goto saw_error; + if (r) + { + if (!cp_tree_equal (cmp_expr, r)) + goto bad_if; + cmp_expr = rhs; + rhs = NULL_TREE; + gcc_assert (TREE_CODE (cmp_expr) == EQ_EXPR); + } + if (TREE_CODE (cmp_expr) == EQ_EXPR) + ; + else if (!structured_block && code == OMP_ATOMIC_CAPTURE_NEW) + { + error_at (EXPR_LOC_OR_LOC (cmp_expr, eloc), + "expected %<==%> comparison in %<if%> condition"); + goto saw_error; + } + else if (TREE_CODE (cmp_expr) != GT_EXPR + && TREE_CODE (cmp_expr) != LT_EXPR) + { + error_at (EXPR_LOC_OR_LOC (cmp_expr, eloc), + "expected %<==%>, %<<%> or %<>%> comparison in %<if%> " + "condition"); + goto saw_error; + } + if (!cp_parser_require (parser, CPP_OPEN_BRACE, RT_OPEN_BRACE)) + goto saw_error; + + extra_scope = true; + eloc = cp_lexer_peek_token (parser->lexer)->location; + lhs = cp_parser_unary_expression (parser); + orig_lhs = lhs; + if (lhs == error_mark_node) + goto saw_error; + if (!cp_lexer_next_token_is (parser->lexer, CPP_EQ)) + { + cp_parser_error (parser, "expected %<=%>"); + goto saw_error; + } + cp_lexer_consume_token (parser->lexer); + eloc = cp_lexer_peek_token (parser->lexer)->location; + if (TREE_CODE (cmp_expr) == EQ_EXPR) + rhs1 = cp_parser_expression (parser); + else + rhs1 = cp_parser_simple_cast_expression (parser); + + if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)) + goto saw_error; + + if (!cp_parser_require (parser, CPP_CLOSE_BRACE, RT_CLOSE_BRACE)) + goto saw_error; + + extra_scope = false; + no_semicolon = true; + + if (cp_tree_equal (TREE_OPERAND (cmp_expr, 0), lhs)) + { + if (TREE_CODE (cmp_expr) == EQ_EXPR) + { + opcode = COND_EXPR; + rhs = TREE_OPERAND (cmp_expr, 1); + } + else if (cp_tree_equal (TREE_OPERAND (cmp_expr, 1), rhs1)) + { + opcode = (TREE_CODE (cmp_expr) == GT_EXPR + ? MIN_EXPR : MAX_EXPR); + rhs = rhs1; + rhs1 = TREE_OPERAND (cmp_expr, 0); + } + else + goto bad_if; + } + else if (TREE_CODE (cmp_expr) == EQ_EXPR) + goto bad_if; + else if (cp_tree_equal (TREE_OPERAND (cmp_expr, 1), lhs) + && cp_tree_equal (TREE_OPERAND (cmp_expr, 0), rhs1)) + { + opcode = (TREE_CODE (cmp_expr) == GT_EXPR + ? MAX_EXPR : MIN_EXPR); + rhs = rhs1; + rhs1 = TREE_OPERAND (cmp_expr, 1); + } + else + { + bad_if: + cp_parser_error (parser, + "invalid form of %<#pragma omp atomic compare%>"); + goto saw_error; + } + + if (cp_lexer_next_token_is_keyword (parser->lexer, RID_ELSE)) + { + if (code != OMP_ATOMIC_CAPTURE_NEW + || (structured_block && r == NULL_TREE) + || TREE_CODE (cmp_expr) != EQ_EXPR) + { + eloc = cp_lexer_peek_token (parser->lexer)->location; + error_at (eloc, "unexpected %<else%>"); + goto saw_error; + } + + cp_lexer_consume_token (parser->lexer); + + if (!cp_parser_require (parser, CPP_OPEN_BRACE, RT_OPEN_BRACE)) + goto saw_error; + + extra_scope = true; + v = cp_parser_unary_expression (parser); + if (v == error_mark_node) + goto saw_error; + if (!cp_parser_require (parser, CPP_EQ, RT_EQ)) + goto saw_error; + + tree expr = cp_parser_simple_cast_expression (parser); + + if (!cp_tree_equal (expr, lhs)) + goto bad_if; + + if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)) + goto saw_error; + + if (!cp_parser_require (parser, CPP_CLOSE_BRACE, RT_CLOSE_BRACE)) + goto saw_error; + + extra_scope = false; + code = OMP_ATOMIC_CAPTURE_OLD; + if (r == NULL_TREE) + /* Signal to c_finish_omp_atomic that in + if (x == e) { x = d; } else { v = x; } + case the store to v should be conditional. */ + r = void_list_node; + } + else if (code == OMP_ATOMIC_CAPTURE_NEW && !structured_block) + { + cp_parser_error (parser, "expected %<else%>"); + goto saw_error; + } + else if (code == OMP_ATOMIC_CAPTURE_NEW + && r != NULL_TREE + && v == NULL_TREE) + code = OMP_ATOMIC; + goto stmt_done; + } lhs = cp_parser_unary_expression (parser); orig_lhs = lhs; switch (TREE_CODE (lhs)) @@ -40290,6 +40611,8 @@ restart: lhs = TREE_OPERAND (lhs, 0); opcode = PLUS_EXPR; rhs = integer_one_node; + if (compare) + goto invalid_compare; break; case POSTDECREMENT_EXPR: @@ -40300,6 +40623,8 @@ restart: lhs = TREE_OPERAND (lhs, 0); opcode = MINUS_EXPR; rhs = integer_one_node; + if (compare) + goto invalid_compare; break; case COMPOUND_EXPR: @@ -40328,11 +40653,18 @@ restart: && !structured_block && TREE_CODE (orig_lhs) == COMPOUND_EXPR) code = OMP_ATOMIC_CAPTURE_OLD; + if (compare) + goto invalid_compare; break; } } /* FALLTHRU */ default: + if (compare && !cp_lexer_next_token_is (parser->lexer, CPP_EQ)) + { + cp_parser_error (parser, "expected %<=%>"); + goto saw_error; + } switch (cp_lexer_peek_token (parser->lexer)->type) { case CPP_MULT_EQ: @@ -40400,6 +40732,8 @@ restart: case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR: + if (compare) + break; if (cp_tree_equal (lhs, TREE_OPERAND (rhs, 1))) { if (cp_parser_parse_definitely (parser)) @@ -40413,11 +40747,91 @@ restart: goto saw_error; } break; + case EQ_EXPR: + if (!compare + || code != OMP_ATOMIC_CAPTURE_NEW + || !structured_block + || v + || r) + break; + if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON) + && cp_lexer_nth_token_is_keyword (parser->lexer, + 2, RID_IF)) + { + if (cp_parser_parse_definitely (parser)) + { + r = lhs; + lhs = NULL_TREE; + rhs1 = NULL_TREE; + cp_lexer_consume_token (parser->lexer); + goto restart; + } + } + break; + case GT_EXPR: + case LT_EXPR: + if (compare + && cp_lexer_next_token_is (parser->lexer, CPP_QUERY) + && cp_tree_equal (lhs, TREE_OPERAND (rhs, 1)) + && cp_parser_parse_definitely (parser)) + { + opcode = TREE_CODE (rhs); + rhs1 = TREE_OPERAND (rhs, 0); + rhs = TREE_OPERAND (rhs, 1); + cond_expr: + cp_lexer_consume_token (parser->lexer); + bool saved_colon_corrects_to_scope_p + = parser->colon_corrects_to_scope_p; + parser->colon_corrects_to_scope_p = false; + tree e1 = cp_parser_expression (parser); + parser->colon_corrects_to_scope_p + = saved_colon_corrects_to_scope_p; + cp_parser_require (parser, CPP_COLON, RT_COLON); + tree e2 = cp_parser_simple_cast_expression (parser); + if (cp_tree_equal (lhs, e2)) + { + if (cp_tree_equal (lhs, rhs1)) + { + if (opcode == EQ_EXPR) + { + opcode = COND_EXPR; + rhs1 = e1; + goto stmt_done; + } + if (cp_tree_equal (rhs, e1)) + { + opcode + = opcode == GT_EXPR ? MIN_EXPR : MAX_EXPR; + rhs = e1; + goto stmt_done; + } + } + else + { + gcc_assert (opcode != EQ_EXPR); + if (cp_tree_equal (rhs1, e1)) + { + opcode + = opcode == GT_EXPR ? MAX_EXPR : MIN_EXPR; + rhs1 = rhs; + rhs = e1; + goto stmt_done; + } + } + } + cp_parser_error (parser, + "invalid form of " + "%<#pragma omp atomic compare%>"); + goto saw_error; + } + break; default: break; } cp_parser_abort_tentative_parse (parser); - if (structured_block && code == OMP_ATOMIC_CAPTURE_OLD) + if (structured_block + && code == OMP_ATOMIC_CAPTURE_OLD + && !compare) { rhs = cp_parser_expression (parser); if (rhs == error_mark_node) @@ -40445,7 +40859,7 @@ restart: cp_lexer_consume_token (parser->lexer); goto restart; } - else if (structured_block) + else if (structured_block && !compare) { opcode = NOP_EXPR; rhs = rhs1; @@ -40482,11 +40896,28 @@ restart: case CPP_XOR: opcode = BIT_XOR_EXPR; break; + case CPP_EQ_EQ: + opcode = EQ_EXPR; + break; + case CPP_GREATER: + opcode = GT_EXPR; + break; + case CPP_LESS: + opcode = LT_EXPR; + break; default: cp_parser_error (parser, "invalid operator for %<#pragma omp atomic%>"); goto saw_error; } + if (compare + && TREE_CODE_CLASS (opcode) != tcc_comparison) + { + cp_parser_error (parser, + "invalid form of " + "%<#pragma omp atomic compare%>"); + goto saw_error; + } oprec = TOKEN_PRECEDENCE (token); gcc_assert (oprec != PREC_NOT_OPERATOR); if (commutative_tree_code (opcode)) @@ -40496,8 +40927,18 @@ restart: oprec, NULL); if (rhs == error_mark_node) goto saw_error; + if (compare) + { + if (!cp_lexer_next_token_is (parser->lexer, CPP_QUERY)) + { + cp_parser_error (parser, + "invalid form of " + "%<#pragma omp atomic compare%>"); + goto saw_error; + } + goto cond_expr; + } goto stmt_done; - /* FALLTHROUGH */ default: cp_parser_error (parser, "invalid operator for %<#pragma omp atomic%>"); @@ -40511,10 +40952,12 @@ restart: break; } stmt_done: - if (structured_block && code == OMP_ATOMIC_CAPTURE_NEW) + if (structured_block && code == OMP_ATOMIC_CAPTURE_NEW && r == NULL_TREE) { - if (!cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)) + if (!no_semicolon + && !cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON)) goto saw_error; + no_semicolon = false; v = cp_parser_unary_expression (parser); if (v == error_mark_node) goto saw_error; @@ -40526,19 +40969,30 @@ stmt_done: } if (structured_block) { - cp_parser_consume_semicolon_at_end_of_statement (parser); + if (!no_semicolon) + cp_parser_consume_semicolon_at_end_of_statement (parser); cp_parser_require (parser, CPP_CLOSE_BRACE, RT_CLOSE_BRACE); } done: + if (weak && opcode != COND_EXPR) + { + error_at (loc, "%<weak%> clause requires atomic equality comparison"); + weak = false; + } clauses = finish_omp_clauses (clauses, C_ORT_OMP); finish_omp_atomic (pragma_tok->location, code, opcode, lhs, rhs, v, lhs1, - rhs1, clauses, memory_order); - if (!structured_block) + rhs1, r, clauses, memory_order, weak); + if (!structured_block && !no_semicolon) cp_parser_consume_semicolon_at_end_of_statement (parser); return; + invalid_compare: + error ("invalid form of %<pragma omp atomic compare%>"); + /* FALLTHRU */ saw_error: cp_parser_skip_to_end_of_block_or_statement (parser); + if (extra_scope && cp_lexer_next_token_is (parser->lexer, CPP_CLOSE_BRACE)) + cp_lexer_consume_token (parser->lexer); if (structured_block) { if (cp_lexer_next_token_is (parser->lexer, CPP_CLOSE_BRACE)) diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 224dd9e..12c8812 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -2218,7 +2218,8 @@ determine_specialization (tree template_id, targs = coerce_template_parms (parms, explicit_targs, fns, tf_warning_or_error, /*req_all*/true, /*use_defarg*/true); - if (targs != error_mark_node) + if (targs != error_mark_node + && constraints_satisfied_p (fns, targs)) templates = tree_cons (targs, fns, templates); } else for (lkp_iterator iter (fns); iter; ++iter) @@ -19029,23 +19030,42 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, { tree op1 = TREE_OPERAND (t, 1); tree rhs1 = NULL_TREE; + tree r = NULL_TREE; tree lhs, rhs; if (TREE_CODE (op1) == COMPOUND_EXPR) { rhs1 = RECUR (TREE_OPERAND (op1, 0)); op1 = TREE_OPERAND (op1, 1); } - lhs = RECUR (TREE_OPERAND (op1, 0)); - rhs = RECUR (TREE_OPERAND (op1, 1)); + if (TREE_CODE (op1) == COND_EXPR) + { + gcc_assert (rhs1 == NULL_TREE); + tree c = TREE_OPERAND (op1, 0); + if (TREE_CODE (c) == MODIFY_EXPR) + { + r = RECUR (TREE_OPERAND (c, 0)); + c = TREE_OPERAND (c, 1); + } + gcc_assert (TREE_CODE (c) == EQ_EXPR); + rhs = RECUR (TREE_OPERAND (c, 1)); + lhs = RECUR (TREE_OPERAND (op1, 2)); + rhs1 = RECUR (TREE_OPERAND (op1, 1)); + } + else + { + lhs = RECUR (TREE_OPERAND (op1, 0)); + rhs = RECUR (TREE_OPERAND (op1, 1)); + } finish_omp_atomic (EXPR_LOCATION (t), OMP_ATOMIC, TREE_CODE (op1), - lhs, rhs, NULL_TREE, NULL_TREE, rhs1, tmp, - OMP_ATOMIC_MEMORY_ORDER (t)); + lhs, rhs, NULL_TREE, NULL_TREE, rhs1, r, + tmp, OMP_ATOMIC_MEMORY_ORDER (t), + OMP_ATOMIC_WEAK (t)); } else { tree op1 = TREE_OPERAND (t, 1); tree v = NULL_TREE, lhs, rhs = NULL_TREE, lhs1 = NULL_TREE; - tree rhs1 = NULL_TREE; + tree rhs1 = NULL_TREE, r = NULL_TREE; enum tree_code code = TREE_CODE (TREE_OPERAND (op1, 1)); enum tree_code opcode = NOP_EXPR; if (code == OMP_ATOMIC_READ) @@ -19064,8 +19084,25 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, rhs1 = RECUR (TREE_OPERAND (op11, 0)); op11 = TREE_OPERAND (op11, 1); } - lhs = RECUR (TREE_OPERAND (op11, 0)); - rhs = RECUR (TREE_OPERAND (op11, 1)); + if (TREE_CODE (op11) == COND_EXPR) + { + gcc_assert (rhs1 == NULL_TREE); + tree c = TREE_OPERAND (op11, 0); + if (TREE_CODE (c) == MODIFY_EXPR) + { + r = RECUR (TREE_OPERAND (c, 0)); + c = TREE_OPERAND (c, 1); + } + gcc_assert (TREE_CODE (c) == EQ_EXPR); + rhs = RECUR (TREE_OPERAND (c, 1)); + lhs = RECUR (TREE_OPERAND (op11, 2)); + rhs1 = RECUR (TREE_OPERAND (op11, 1)); + } + else + { + lhs = RECUR (TREE_OPERAND (op11, 0)); + rhs = RECUR (TREE_OPERAND (op11, 1)); + } opcode = TREE_CODE (op11); if (opcode == MODIFY_EXPR) opcode = NOP_EXPR; @@ -19077,7 +19114,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, rhs = RECUR (TREE_OPERAND (op1, 1)); } finish_omp_atomic (EXPR_LOCATION (t), code, opcode, lhs, rhs, v, - lhs1, rhs1, tmp, OMP_ATOMIC_MEMORY_ORDER (t)); + lhs1, rhs1, r, tmp, + OMP_ATOMIC_MEMORY_ORDER (t), OMP_ATOMIC_WEAK (t)); } break; diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c index fcb3308..9c5066b 100644 --- a/gcc/cp/rtti.c +++ b/gcc/cp/rtti.c @@ -125,7 +125,6 @@ static tree tinfo_name (tree, bool); static tree build_dynamic_cast_1 (location_t, tree, tree, tsubst_flags_t); static tree throw_bad_cast (void); static tree throw_bad_typeid (void); -static tree get_tinfo_ptr (tree); static bool typeid_ok_p (void); static int qualifier_flags (tree); static bool target_incomplete_p (tree); @@ -142,22 +141,11 @@ static bool typeinfo_in_lib_p (tree); static int doing_runtime = 0; -/* Declare language defined type_info type and a pointer to const - type_info. This is incomplete here, and will be completed when - the user #includes <typeinfo>. There are language defined - restrictions on what can be done until that is included. Create - the internal versions of the ABI types. */ +/* Create the internal versions of the ABI types. */ void init_rtti_processing (void) { - push_nested_namespace (std_node); - tree type_info_type = xref_tag (class_type, get_identifier ("type_info")); - pop_nested_namespace (std_node); - const_type_info_type_node - = cp_build_qualified_type (type_info_type, TYPE_QUAL_CONST); - type_info_ptr_type = build_pointer_type (const_type_info_type_node); - vec_alloc (unemitted_tinfo_decls, 124); create_tinfo_types (); @@ -238,6 +226,33 @@ throw_bad_typeid (void) return build_cxx_call (fn, 0, NULL, tf_warning_or_error); } +/* const type_info*. */ + +inline tree +type_info_ptr_type () +{ + return build_pointer_type (const_type_info_type_node); +} + +/* Return a pointer to a type_info object describing TYPE, suitably + cast to the language defined type (for typeid) or void (for building + up the descriptors). */ + +static tree +get_tinfo_ptr (tree type, bool voidp = false) +{ + tree decl = get_tinfo_decl (type); + mark_used (decl); + + tree ptype = voidp ? const_ptr_type_node : type_info_ptr_type (); + return build_nop (ptype, build_address (decl)); +} +static inline tree +get_void_tinfo_ptr (tree type) +{ + return get_tinfo_ptr (type, true); +} + /* Return an lvalue expression whose type is "const std::type_info" and whose value indicates the type of the expression EXP. If EXP is a reference to a polymorphic class, return the dynamic type; @@ -278,7 +293,7 @@ get_tinfo_decl_dynamic (tree exp, tsubst_flags_t complain) index = build_int_cst (NULL_TREE, -1 * TARGET_VTABLE_DATA_ENTRY_DISTANCE); t = build_vtbl_ref (exp, index); - t = convert (type_info_ptr_type, t); + t = convert (type_info_ptr_type (), t); } else /* Otherwise return the type_info for the static type of the expr. */ @@ -296,15 +311,22 @@ typeid_ok_p (void) return false; } - if (!COMPLETE_TYPE_P (const_type_info_type_node)) + if (!const_type_info_type_node) { - gcc_rich_location richloc (input_location); - maybe_add_include_fixit (&richloc, "<typeinfo>", false); - error_at (&richloc, - "must %<#include <typeinfo>%> before using" - " %<typeid%>"); + tree name = get_identifier ("type_info"); + tree decl = lookup_qualified_name (std_node, name); + if (TREE_CODE (decl) != TYPE_DECL) + { + gcc_rich_location richloc (input_location); + maybe_add_include_fixit (&richloc, "<typeinfo>", false); + error_at (&richloc, + "must %<#include <typeinfo>%> before using" + " %<typeid%>"); - return false; + return false; + } + const_type_info_type_node + = cp_build_qualified_type (TREE_TYPE (decl), TYPE_QUAL_CONST); } tree pseudo = TYPE_MAIN_VARIANT (get_tinfo_desc (TK_TYPE_INFO_TYPE)->type); @@ -471,19 +493,6 @@ get_tinfo_decl_direct (tree type, tree name, int pseudo_ix) return d; } -/* Return a pointer to a type_info object describing TYPE, suitably - cast to the language defined type. */ - -static tree -get_tinfo_ptr (tree type) -{ - tree decl = get_tinfo_decl (type); - - mark_used (decl); - return build_nop (type_info_ptr_type, - build_address (decl)); -} - /* Return the type_info object for TYPE. */ tree @@ -1032,7 +1041,7 @@ ptr_initializer (tinfo_s *ti, tree target) CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, build_int_cst (NULL_TREE, flags)); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - get_tinfo_ptr (TYPE_MAIN_VARIANT (to))); + get_void_tinfo_ptr (TYPE_MAIN_VARIANT (to))); init = build_constructor (init_list_type_node, v); TREE_CONSTANT (init) = 1; @@ -1063,8 +1072,8 @@ ptm_initializer (tinfo_s *ti, tree target) CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, build_int_cst (NULL_TREE, flags)); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - get_tinfo_ptr (TYPE_MAIN_VARIANT (to))); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, get_tinfo_ptr (klass)); + get_void_tinfo_ptr (TYPE_MAIN_VARIANT (to))); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, get_void_tinfo_ptr (klass)); init = build_constructor (init_list_type_node, v); TREE_CONSTANT (init) = 1; @@ -1156,7 +1165,7 @@ get_pseudo_ti_init (tree type, unsigned tk_index) case TK_SI_CLASS_TYPE: { tree base_binfo = BINFO_BASE_BINFO (TYPE_BINFO (type), 0); - tree tinfo = get_tinfo_ptr (BINFO_TYPE (base_binfo)); + tree tinfo = get_void_tinfo_ptr (BINFO_TYPE (base_binfo)); /* get_tinfo_ptr might have reallocated the tinfo_descs vector. */ ti = &(*tinfo_descs)[tk_index]; @@ -1187,7 +1196,7 @@ get_pseudo_ti_init (tree type, unsigned tk_index) if ((*base_accesses)[ix] == access_public_node) flags |= 2; - tinfo = get_tinfo_ptr (BINFO_TYPE (base_binfo)); + tinfo = get_void_tinfo_ptr (BINFO_TYPE (base_binfo)); if (BINFO_VIRTUAL_P (base_binfo)) { /* We store the vtable offset at which the virtual @@ -1360,7 +1369,7 @@ get_tinfo_desc (unsigned ix) /* Base class internal helper. Pointer to base type, offset to base, flags. */ tree fld_ptr = build_decl (BUILTINS_LOCATION, FIELD_DECL, - NULL_TREE, type_info_ptr_type); + NULL_TREE, const_ptr_type_node); DECL_CHAIN (fld_ptr) = fields; fields = fld_ptr; @@ -1396,7 +1405,7 @@ get_tinfo_desc (unsigned ix) fields = fld_mask; tree fld_ptr = build_decl (BUILTINS_LOCATION, FIELD_DECL, - NULL_TREE, type_info_ptr_type); + NULL_TREE, const_ptr_type_node); DECL_CHAIN (fld_ptr) = fields; fields = fld_ptr; @@ -1404,7 +1413,7 @@ get_tinfo_desc (unsigned ix) { /* Add a pointer to the class too. */ tree fld_cls = build_decl (BUILTINS_LOCATION, FIELD_DECL, - NULL_TREE, type_info_ptr_type); + NULL_TREE, const_ptr_type_node); DECL_CHAIN (fld_cls) = fields; fields = fld_cls; } @@ -1421,7 +1430,7 @@ get_tinfo_desc (unsigned ix) class. This is really a descendant of __class_type_info. */ tree fld_ptr = build_decl (BUILTINS_LOCATION, FIELD_DECL, - NULL_TREE, type_info_ptr_type); + NULL_TREE, const_ptr_type_node); DECL_CHAIN (fld_ptr) = fields; fields = fld_ptr; break; diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 94e6b18..35a7b9f 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -9877,14 +9877,15 @@ finish_omp_for_block (tree bind, tree omp_for) void finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, - tree lhs, tree rhs, tree v, tree lhs1, tree rhs1, - tree clauses, enum omp_memory_order mo) + tree lhs, tree rhs, tree v, tree lhs1, tree rhs1, tree r, + tree clauses, enum omp_memory_order mo, bool weak) { tree orig_lhs; tree orig_rhs; tree orig_v; tree orig_lhs1; tree orig_rhs1; + tree orig_r; bool dependent_p; tree stmt; @@ -9893,6 +9894,7 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, orig_v = v; orig_lhs1 = lhs1; orig_rhs1 = rhs1; + orig_r = r; dependent_p = false; stmt = NULL_TREE; @@ -9904,7 +9906,10 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, || (rhs && type_dependent_expression_p (rhs)) || (v && type_dependent_expression_p (v)) || (lhs1 && type_dependent_expression_p (lhs1)) - || (rhs1 && type_dependent_expression_p (rhs1))); + || (rhs1 && type_dependent_expression_p (rhs1)) + || (r + && r != void_list_node + && type_dependent_expression_p (r))); if (clauses) { gcc_assert (TREE_CODE (clauses) == OMP_CLAUSE @@ -9925,17 +9930,19 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, lhs1 = build_non_dependent_expr (lhs1); if (rhs1) rhs1 = build_non_dependent_expr (rhs1); + if (r && r != void_list_node) + r = build_non_dependent_expr (r); } } if (!dependent_p) { bool swapped = false; - if (rhs1 && cp_tree_equal (lhs, rhs)) + if (rhs1 && opcode != COND_EXPR && cp_tree_equal (lhs, rhs)) { std::swap (rhs, rhs1); swapped = !commutative_tree_code (opcode); } - if (rhs1 && !cp_tree_equal (lhs, rhs1)) + if (rhs1 && opcode != COND_EXPR && !cp_tree_equal (lhs, rhs1)) { if (code == OMP_ATOMIC) error ("%<#pragma omp atomic update%> uses two different " @@ -9956,7 +9963,7 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, return; } stmt = c_finish_omp_atomic (loc, code, opcode, lhs, rhs, - v, lhs1, rhs1, NULL_TREE, swapped, mo, false, + v, lhs1, rhs1, r, swapped, mo, weak, processing_template_decl != 0); if (stmt == error_mark_node) return; @@ -9973,6 +9980,16 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, { if (opcode == NOP_EXPR) stmt = build2 (MODIFY_EXPR, void_type_node, orig_lhs, orig_rhs); + else if (opcode == COND_EXPR) + { + stmt = build2 (EQ_EXPR, boolean_type_node, orig_lhs, orig_rhs); + if (orig_r) + stmt = build2 (MODIFY_EXPR, boolean_type_node, orig_r, + stmt); + stmt = build3 (COND_EXPR, void_type_node, stmt, orig_rhs1, + orig_lhs); + orig_rhs1 = NULL_TREE; + } else stmt = build2 (opcode, void_type_node, orig_lhs, orig_rhs); if (orig_rhs1) @@ -9982,12 +9999,14 @@ finish_omp_atomic (location_t loc, enum tree_code code, enum tree_code opcode, { stmt = build_min_nt_loc (loc, code, orig_lhs1, stmt); OMP_ATOMIC_MEMORY_ORDER (stmt) = mo; + OMP_ATOMIC_WEAK (stmt) = weak; stmt = build2 (MODIFY_EXPR, void_type_node, orig_v, stmt); } } stmt = build2 (OMP_ATOMIC, void_type_node, clauses ? clauses : integer_zero_node, stmt); OMP_ATOMIC_MEMORY_ORDER (stmt) = mo; + OMP_ATOMIC_WEAK (stmt) = weak; SET_EXPR_LOCATION (stmt, loc); } diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c index f78dbf2..abfd7da 100644 --- a/gcc/cp/typeck2.c +++ b/gcc/cp/typeck2.c @@ -524,7 +524,20 @@ split_nonconstant_init_1 (tree dest, tree init, bool nested) sub = build3 (COMPONENT_REF, inner_type, dest, field_index, NULL_TREE); - if (!split_nonconstant_init_1 (sub, value, true)) + if (!split_nonconstant_init_1 (sub, value, true) + /* For flexible array member with initializer we + can't remove the initializer, because only the + initializer determines how many elements the + flexible array member has. */ + || (!array_type_p + && TREE_CODE (inner_type) == ARRAY_TYPE + && TYPE_DOMAIN (inner_type) == NULL + && TREE_CODE (TREE_TYPE (value)) == ARRAY_TYPE + && COMPLETE_TYPE_P (TREE_TYPE (value)) + && !integer_zerop (TYPE_SIZE (TREE_TYPE (value))) + && idx == CONSTRUCTOR_NELTS (init) - 1 + && TYPE_HAS_TRIVIAL_DESTRUCTOR + (strip_array_types (inner_type)))) complete_p = false; else { diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 78cfc10..4acb941 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -29662,10 +29662,11 @@ so @option{-mno-lra} needs to be passed to get old Reload. Set the instruction set, register set, and instruction scheduling parameters for machine type @var{cpu_type}. Supported values for @var{cpu_type} are @samp{v7}, @samp{cypress}, @samp{v8}, @samp{supersparc}, @samp{hypersparc}, -@samp{leon}, @samp{leon3}, @samp{leon3v7}, @samp{sparclite}, @samp{f930}, -@samp{f934}, @samp{sparclite86x}, @samp{sparclet}, @samp{tsc701}, @samp{v9}, -@samp{ultrasparc}, @samp{ultrasparc3}, @samp{niagara}, @samp{niagara2}, -@samp{niagara3}, @samp{niagara4}, @samp{niagara7} and @samp{m8}. +@samp{leon}, @samp{leon3}, @samp{leon3v7}, @samp{leon5}, @samp{sparclite}, +@samp{f930}, @samp{f934}, @samp{sparclite86x}, @samp{sparclet}, @samp{tsc701}, +@samp{v9}, @samp{ultrasparc}, @samp{ultrasparc3}, @samp{niagara}, +@samp{niagara2}, @samp{niagara3}, @samp{niagara4}, @samp{niagara7} and +@samp{m8}. Native Solaris and GNU/Linux toolchains also support the value @samp{native}, which selects the best architecture option for the host processor. @@ -29684,7 +29685,7 @@ implementations. cypress, leon3v7 @item v8 -supersparc, hypersparc, leon, leon3 +supersparc, hypersparc, leon, leon3, leon5 @item sparclite f930, f934, sparclite86x @@ -29751,7 +29752,7 @@ The same values for @option{-mcpu=@var{cpu_type}} can be used for @option{-mtune=@var{cpu_type}}, but the only useful values are those that select a particular CPU implementation. Those are @samp{cypress}, @samp{supersparc}, @samp{hypersparc}, @samp{leon}, -@samp{leon3}, @samp{leon3v7}, @samp{f930}, @samp{f934}, +@samp{leon3}, @samp{leon3v7}, @samp{leon5}, @samp{f930}, @samp{f934}, @samp{sparclite86x}, @samp{tsc701}, @samp{ultrasparc}, @samp{ultrasparc3}, @samp{niagara}, @samp{niagara2}, @samp{niagara3}, @samp{niagara4}, @samp{niagara7} and @samp{m8}. With native Solaris diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 991f3cf..8e22dd2 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,34 @@ +2021-09-16 Harald Anlauf <anlauf@gmx.de> + + PR fortran/102287 + * trans-expr.c (gfc_conv_procedure_call): Wrap deallocation of + allocatable components of optional allocatable derived type + procedure arguments with INTENT(OUT) into a presence check. + +2021-09-14 Harald Anlauf <anlauf@gmx.de> + + PR fortran/102311 + * resolve.c (resolve_entries): Attempt to recover cleanly after + rejecting mismatched function entries. + +2021-09-14 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/102313 + * parse.c (gfc_ascii_statement): Add missing ST_OMP_END_SCOPE. + +2021-09-13 Harald Anlauf <anlauf@gmx.de> + + PR fortran/82314 + * decl.c (add_init_expr_to_sym): For proper initialization of + array-valued named constants the array bounds need to be + simplified before adding the initializer. + +2021-09-13 Harald Anlauf <anlauf@gmx.de> + + PR fortran/85130 + * expr.c (find_substring_ref): Handle given substring start and + end indices as signed integers, not unsigned. + 2021-09-09 Harald Anlauf <anlauf@gmx.de> PR fortran/98490 diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 1aacd33..1b9a89d 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -15193,8 +15193,8 @@ In addition to the integer named constants required by the Fortran 2003 standard and @code{C_PTRDIFF_T} of TS 29113, GNU Fortran provides as an extension named constants for the 128-bit integer types supported by the C compiler: @code{C_INT128_T, C_INT_LEAST128_T, C_INT_FAST128_T}. -Furthermore, if @code{__float128} is supported in C, the named constants -@code{C_FLOAT128, C_FLOAT128_COMPLEX} are defined. +Furthermore, if @code{_Float128} is supported in C, the named constants +@code{C_FLOAT128} and @code{C_FLOAT128_COMPLEX} are defined. @multitable @columnfractions .15 .35 .35 .35 @headitem Fortran Type @tab Named constant @tab C type @tab Extension @@ -15225,11 +15225,11 @@ Furthermore, if @code{__float128} is supported in C, the named constants @item @code{REAL} @tab @code{C_FLOAT} @tab @code{float} @item @code{REAL} @tab @code{C_DOUBLE} @tab @code{double} @item @code{REAL} @tab @code{C_LONG_DOUBLE} @tab @code{long double} -@item @code{REAL} @tab @code{C_FLOAT128} @tab @code{__float128} @tab Ext. +@item @code{REAL} @tab @code{C_FLOAT128} @tab @code{_Float128} @tab Ext. @item @code{COMPLEX}@tab @code{C_FLOAT_COMPLEX} @tab @code{float _Complex} @item @code{COMPLEX}@tab @code{C_DOUBLE_COMPLEX}@tab @code{double _Complex} @item @code{COMPLEX}@tab @code{C_LONG_DOUBLE_COMPLEX}@tab @code{long double _Complex} -@item @code{REAL} @tab @code{C_FLOAT128_COMPLEX} @tab @code{__float128 _Complex} @tab Ext. +@item @code{COMPLEX}@tab @code{C_FLOAT128_COMPLEX} @tab @code{_Float128 _Complex} @tab Ext. @item @code{LOGICAL}@tab @code{C_BOOL} @tab @code{_Bool} @item @code{CHARACTER}@tab @code{C_CHAR} @tab @code{char} @end multitable diff --git a/gcc/fortran/iso-c-binding.def b/gcc/fortran/iso-c-binding.def index e65c750..50256fe 100644 --- a/gcc/fortran/iso-c-binding.def +++ b/gcc/fortran/iso-c-binding.def @@ -116,7 +116,7 @@ NAMED_REALCST (ISOCBINDING_LONG_DOUBLE, "c_long_double", \ get_real_kind_from_node (long_double_type_node), GFC_STD_F2003) /* GNU Extension. Note that the equivalence here is specifically to - the IEEE 128-bit type __float128; if that does not map onto a type + the IEEE 128-bit type _Float128; if that does not map onto a type otherwise supported by the Fortran front end, get_real_kind_from_node will reject it as unsupported. */ NAMED_REALCST (ISOCBINDING_FLOAT128, "c_float128", \ diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c index d37a0b5..7d765a0 100644 --- a/gcc/fortran/parse.c +++ b/gcc/fortran/parse.c @@ -2406,6 +2406,9 @@ gfc_ascii_statement (gfc_statement st) case ST_OMP_END_DO_SIMD: p = "!$OMP END DO SIMD"; break; + case ST_OMP_END_SCOPE: + p = "!$OMP END SCOPE"; + break; case ST_OMP_END_SIMD: p = "!$OMP END SIMD"; break; diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 8e5ed1c..30b96b2 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -811,7 +811,7 @@ resolve_entries (gfc_namespace *ns) gfc_error ("Function %s at %L has entry %s with mismatched " "characteristics", ns->entries->sym->name, &ns->entries->sym->declared_at, el->sym->name); - return; + goto cleanup; } else if (ts->type == BT_CHARACTER && ts->u.cl && fts->u.cl && (((ts->u.cl->length && !fts->u.cl->length) @@ -917,6 +917,8 @@ resolve_entries (gfc_namespace *ns) } } } + +cleanup: proc->attr.access = ACCESS_PRIVATE; proc->attr.entry_master = 1; diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 18d6651..4a81f46 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -6548,6 +6548,17 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, // deallocate the components first tmp = gfc_deallocate_alloc_comp (fsym->ts.u.derived, parmse.expr, e->rank); + /* But check whether dummy argument is optional. */ + if (tmp != NULL_TREE + && fsym->attr.optional + && e->expr_type == EXPR_VARIABLE + && e->symtree->n.sym->attr.optional) + { + tree present; + present = gfc_conv_expr_present (e->symtree->n.sym); + tmp = build3_v (COND_EXPR, present, tmp, + build_empty_stmt (input_location)); + } if (tmp != NULL_TREE) gfc_add_expr_to_block (&se->pre, tmp); } diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 46670ba..42a995b 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -175,7 +175,7 @@ gfc_builtin_decl_for_float_kind (enum built_in_function double_built_in, if (gfc_real_kinds[i].c_float128) { - /* For __float128, the story is a bit different, because we return + /* For _Float128, the story is a bit different, because we return a decl to a library function rather than a built-in. */ gfc_intrinsic_map_t *m; for (m = gfc_intrinsic_map; m->double_built_in != double_built_in ; m++) @@ -387,7 +387,7 @@ build_round_expr (tree arg, tree restype) resprec = TYPE_PRECISION (restype); /* Depending on the type of the result, choose the int intrinsic (iround, - available only as a builtin, therefore cannot use it for __float128), long + available only as a builtin, therefore cannot use it for _Float128), long int intrinsic (lround family) or long long intrinsic (llround). If we don't have an appropriate function that converts directly to the integer type (such as kind == 16), just use ROUND, and then convert the result to @@ -689,7 +689,7 @@ gfc_build_intrinsic_lib_fndecls (void) if (gfc_real16_is_float128) { /* If we have soft-float types, we create the decls for their - C99-like library functions. For now, we only handle __float128 + C99-like library functions. For now, we only handle _Float128 q-suffixed functions. */ tree type, complex_type, func_1, func_2, func_cabs, func_frexp; diff --git a/gcc/fortran/trans-types.h b/gcc/fortran/trans-types.h index 3b45ce2..6804bfe 100644 --- a/gcc/fortran/trans-types.h +++ b/gcc/fortran/trans-types.h @@ -55,7 +55,7 @@ extern GTY(()) tree gfc_charlen_type_node; /* The following flags give us information on the correspondence of real (and complex) kinds with C floating-point types long double - and __float128. */ + and _Float128. */ extern bool gfc_real16_is_float128; enum gfc_packed { diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 66dfd25..f680292 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -13859,10 +13859,10 @@ goa_lhs_expr_p (tree expr, tree addr) static int goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, - tree lhs_var, tree &target_expr, bool rhs) + tree lhs_var, tree &target_expr, bool rhs, int depth) { tree expr = *expr_p; - int saw_lhs; + int saw_lhs = 0; if (goa_lhs_expr_p (expr, lhs_addr)) { @@ -13873,17 +13873,22 @@ goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, if (is_gimple_val (expr)) return 0; - saw_lhs = 0; + /* Maximum depth of lhs in expression is for the + __builtin_clear_padding (...), __builtin_clear_padding (...), + __builtin_memcmp (&TARGET_EXPR <lhs, >, ...) == 0 ? ... : lhs; */ + if (++depth > 7) + goto finish; + switch (TREE_CODE_CLASS (TREE_CODE (expr))) { case tcc_binary: case tcc_comparison: saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 1), pre_p, lhs_addr, - lhs_var, target_expr, true); + lhs_var, target_expr, true, depth); /* FALLTHRU */ case tcc_unary: saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, lhs_addr, - lhs_var, target_expr, true); + lhs_var, target_expr, true, depth); break; case tcc_expression: switch (TREE_CODE (expr)) @@ -13895,84 +13900,101 @@ goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, case TRUTH_XOR_EXPR: case BIT_INSERT_EXPR: saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 1), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); /* FALLTHRU */ case TRUTH_NOT_EXPR: saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); break; case MODIFY_EXPR: + if (pre_p && !goa_stabilize_expr (expr_p, NULL, lhs_addr, lhs_var, + target_expr, true, depth)) + break; saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 1), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); + saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, + lhs_addr, lhs_var, target_expr, false, + depth); + break; /* FALLTHRU */ case ADDR_EXPR: + if (pre_p && !goa_stabilize_expr (expr_p, NULL, lhs_addr, lhs_var, + target_expr, true, depth)) + break; saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, - lhs_addr, lhs_var, target_expr, false); + lhs_addr, lhs_var, target_expr, false, + depth); break; case COMPOUND_EXPR: - /* Special-case __builtin_clear_padding call before - __builtin_memcmp. */ - if (TREE_CODE (TREE_OPERAND (expr, 0)) == CALL_EXPR) - { - tree fndecl = get_callee_fndecl (TREE_OPERAND (expr, 0)); - if (fndecl - && fndecl_built_in_p (fndecl, BUILT_IN_CLEAR_PADDING) - && VOID_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)))) + /* Break out any preevaluations from cp_build_modify_expr. */ + for (; TREE_CODE (expr) == COMPOUND_EXPR; + expr = TREE_OPERAND (expr, 1)) + { + /* Special-case __builtin_clear_padding call before + __builtin_memcmp. */ + if (TREE_CODE (TREE_OPERAND (expr, 0)) == CALL_EXPR) { - saw_lhs = goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, - lhs_addr, lhs_var, - target_expr, true); - if (!saw_lhs) - { - expr = TREE_OPERAND (expr, 1); - if (!pre_p) - return goa_stabilize_expr (&expr, pre_p, lhs_addr, - lhs_var, target_expr, true); - *expr_p = expr; - return goa_stabilize_expr (expr_p, pre_p, lhs_addr, - lhs_var, target_expr, true); - } - else + tree fndecl = get_callee_fndecl (TREE_OPERAND (expr, 0)); + if (fndecl + && fndecl_built_in_p (fndecl, BUILT_IN_CLEAR_PADDING) + && VOID_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0))) + && (!pre_p + || goa_stabilize_expr (&TREE_OPERAND (expr, 0), NULL, + lhs_addr, lhs_var, + target_expr, true, depth))) { + if (pre_p) + *expr_p = expr; + saw_lhs = goa_stabilize_expr (&TREE_OPERAND (expr, 0), + pre_p, lhs_addr, lhs_var, + target_expr, true, depth); saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 1), pre_p, lhs_addr, lhs_var, - target_expr, rhs); - break; + target_expr, rhs, depth); + return saw_lhs; } } + + if (pre_p) + gimplify_stmt (&TREE_OPERAND (expr, 0), pre_p); } - /* Break out any preevaluations from cp_build_modify_expr. */ - for (; TREE_CODE (expr) == COMPOUND_EXPR; - expr = TREE_OPERAND (expr, 1)) - if (pre_p) - gimplify_stmt (&TREE_OPERAND (expr, 0), pre_p); if (!pre_p) return goa_stabilize_expr (&expr, pre_p, lhs_addr, lhs_var, - target_expr, rhs); + target_expr, rhs, depth); *expr_p = expr; return goa_stabilize_expr (expr_p, pre_p, lhs_addr, lhs_var, - target_expr, rhs); + target_expr, rhs, depth); case COND_EXPR: if (!goa_stabilize_expr (&TREE_OPERAND (expr, 0), NULL, lhs_addr, - lhs_var, target_expr, true)) + lhs_var, target_expr, true, depth)) break; saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 1), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 2), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); break; case TARGET_EXPR: if (TARGET_EXPR_INITIAL (expr)) { + if (pre_p && !goa_stabilize_expr (expr_p, NULL, lhs_addr, + lhs_var, target_expr, true, + depth)) + break; if (expr == target_expr) saw_lhs = 1; else { saw_lhs = goa_stabilize_expr (&TARGET_EXPR_INITIAL (expr), pre_p, lhs_addr, lhs_var, - target_expr, true); + target_expr, true, depth); if (saw_lhs && target_expr == NULL_TREE && pre_p) target_expr = expr; } @@ -13986,7 +14008,8 @@ goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, if (TREE_CODE (expr) == BIT_FIELD_REF || TREE_CODE (expr) == VIEW_CONVERT_EXPR) saw_lhs |= goa_stabilize_expr (&TREE_OPERAND (expr, 0), pre_p, - lhs_addr, lhs_var, target_expr, true); + lhs_addr, lhs_var, target_expr, true, + depth); break; case tcc_vl_exp: if (TREE_CODE (expr) == CALL_EXPR) @@ -13999,24 +14022,24 @@ goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, for (int i = 0; i < nargs; i++) saw_lhs |= goa_stabilize_expr (&CALL_EXPR_ARG (expr, i), pre_p, lhs_addr, lhs_var, - target_expr, true); + target_expr, true, depth); } - if (saw_lhs == 0 && VOID_TYPE_P (TREE_TYPE (expr))) - { - if (pre_p) - gimplify_stmt (&expr, pre_p); - return 0; - } } break; default: break; } + finish: if (saw_lhs == 0 && pre_p) { enum gimplify_status gs; - if (rhs) + if (TREE_CODE (expr) == CALL_EXPR && VOID_TYPE_P (TREE_TYPE (expr))) + { + gimplify_stmt (&expr, pre_p); + return saw_lhs; + } + else if (rhs) gs = gimplify_expr (expr_p, pre_p, NULL, is_gimple_val, fb_rvalue); else gs = gimplify_expr (expr_p, pre_p, NULL, is_gimple_lvalue, fb_lvalue); @@ -14044,7 +14067,7 @@ gimplify_omp_atomic (tree *expr_p, gimple_seq *pre_p) tmp_load = create_tmp_reg (type); if (rhs && goa_stabilize_expr (&rhs, pre_p, addr, tmp_load, target_expr, - true) < 0) + true, 0) < 0) return GS_ERROR; if (gimplify_expr (&addr, pre_p, NULL, is_gimple_val, fb_rvalue) diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index f481681..e2abd5f 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -c11d9f8275f2bbe9b05cdd815c79ac331f78e15c +850235e4b974b9c5c2d7a1f9860583bd07f2a45c The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/hash-map-tests.c b/gcc/hash-map-tests.c index 6acc0d4..511d434 100644 --- a/gcc/hash-map-tests.c +++ b/gcc/hash-map-tests.c @@ -317,7 +317,8 @@ test_map_of_type_with_ctor_and_dtor () } } -/* Verify aspects of 'hash_table::expand'. */ +/* Verify aspects of 'hash_table::expand', in particular that it doesn't leak + Value objects. */ static void test_map_of_type_with_ctor_and_dtor_expand (bool remove_some_inline) @@ -400,9 +401,9 @@ test_map_of_type_with_ctor_and_dtor_expand (bool remove_some_inline) ASSERT_EQ (val_t::ncopy, n_expand_moved); ASSERT_EQ (val_t::nassign, 0); if (remove_some_inline) - ASSERT_EQ (val_t::ndtor, (i + 2) / 3); + ASSERT_EQ (val_t::ndtor, n_expand_moved + (i + 2) / 3); else - ASSERT_EQ (val_t::ndtor, 0); + ASSERT_EQ (val_t::ndtor, n_expand_moved); /* Remove some inline. This never triggers an 'expand' here, but via 'm_n_deleted' does influence any following one. */ @@ -416,7 +417,7 @@ test_map_of_type_with_ctor_and_dtor_expand (bool remove_some_inline) ASSERT_EQ (val_t::ndefault, 1 + i); ASSERT_EQ (val_t::ncopy, n_expand_moved); ASSERT_EQ (val_t::nassign, 0); - ASSERT_EQ (val_t::ndtor, 1 + (i + 2) / 3); + ASSERT_EQ (val_t::ndtor, n_expand_moved + 1 + (i + 2) / 3); } } ASSERT_EQ (expand_c, expand_c_expected); @@ -439,6 +440,7 @@ test_map_of_type_with_ctor_and_dtor_expand (bool remove_some_inline) ASSERT_EQ (val_t::nassign, nassign); ASSERT_EQ (val_t::ndtor, ndtor); } + ASSERT_EQ (val_t::ndefault + val_t::ncopy, val_t::ndtor); } /* Test calling empty on a hash_map that has a key type with non-zero diff --git a/gcc/hash-table.h b/gcc/hash-table.h index a6e0ac8..ff415c7 100644 --- a/gcc/hash-table.h +++ b/gcc/hash-table.h @@ -820,6 +820,9 @@ hash_table<Descriptor, Lazy, Allocator>::expand () { value_type *q = find_empty_slot_for_expand (Descriptor::hash (x)); new ((void*) q) value_type (std::move (x)); + /* After the resources of 'x' have been moved to a new object at 'q', + we now have to destroy the 'x' object, to end its lifetime. */ + x.~value_type (); } p++; diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index b1283690..8312d08 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3045,23 +3045,18 @@ expand_DEFERRED_INIT (internal_fn, gcall *stmt) if (init_type == AUTO_INIT_PATTERN) { - tree alt_type = NULL_TREE; - if (!can_native_interpret_type_p (var_type)) - { - alt_type - = lang_hooks.types.type_for_mode (TYPE_MODE (var_type), - TYPE_UNSIGNED (var_type)); - gcc_assert (can_native_interpret_type_p (alt_type)); - } - unsigned char *buf = (unsigned char *) xmalloc (total_bytes); memset (buf, INIT_PATTERN_VALUE, total_bytes); - init = native_interpret_expr (alt_type ? alt_type : var_type, - buf, total_bytes); - gcc_assert (init); - - if (alt_type) - init = build1 (VIEW_CONVERT_EXPR, var_type, init); + if (can_native_interpret_type_p (var_type)) + init = native_interpret_expr (var_type, buf, total_bytes); + else + { + tree itype = build_nonstandard_integer_type + (total_bytes * BITS_PER_UNIT, 1); + wide_int w = wi::from_buffer (buf, total_bytes); + init = build1 (VIEW_CONVERT_EXPR, var_type, + wide_int_to_tree (itype, w)); + } } expand_assignment (lhs, init, false); diff --git a/gcc/match.pd b/gcc/match.pd index 008f775..097ed2e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3401,13 +3401,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp @0 @2))))) /* Both signed and unsigned lshift produce the same result, so use - the form that minimizes the number of conversions. */ + the form that minimizes the number of conversions. Postpone this + transformation until after shifts by zero have been folded. */ (simplify (convert (lshift:s@0 (convert:s@1 @2) INTEGER_CST@3)) (if (INTEGRAL_TYPE_P (type) && tree_nop_conversion_p (type, TREE_TYPE (@0)) && INTEGRAL_TYPE_P (TREE_TYPE (@2)) - && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)) + && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type) + && !integer_zerop (@3)) (lshift (convert @2) @3))) /* Simplifications of conversions. */ @@ -7020,13 +7022,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) (with { tree op_type = TREE_TYPE (@4); } (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) - && element_precision (type) == element_precision (op_type)) + && is_truth_type_for (op_type, TREE_TYPE (@0))) (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)))))) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) (with { tree op_type = TREE_TYPE (@4); } (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) - && element_precision (type) == element_precision (op_type)) + && is_truth_type_for (op_type, TREE_TYPE (@0))) (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))))))) /* Same for ternary operations. */ @@ -7036,13 +7038,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (vec_cond @0 (view_convert? (uncond_op@5 @1 @2 @3)) @4) (with { tree op_type = TREE_TYPE (@5); } (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) - && element_precision (type) == element_precision (op_type)) + && is_truth_type_for (op_type, TREE_TYPE (@0))) (view_convert (cond_op @0 @1 @2 @3 (view_convert:op_type @4)))))) (simplify (vec_cond @0 @1 (view_convert? (uncond_op@5 @2 @3 @4))) (with { tree op_type = TREE_TYPE (@5); } (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) - && element_precision (type) == element_precision (op_type)) + && is_truth_type_for (op_type, TREE_TYPE (@0))) (view_convert (cond_op (bit_not @0) @2 @3 @4 (view_convert:op_type @1))))))) #endif diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 137382c..1c8d4ba 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,421 @@ +2021-09-16 Patrick Palka <ppalka@redhat.com> + + PR c++/98486 + * g++.dg/cpp2a/concepts-var-templ1.C: New test. + * g++.dg/cpp2a/concepts-var-templ1a.C: New test. + * g++.dg/cpp2a/concepts-var-templ1b.C: New test. + +2021-09-16 Harald Anlauf <anlauf@gmx.de> + + PR fortran/102287 + * gfortran.dg/intent_out_14.f90: New test. + +2021-09-16 Richard Biener <rguenther@suse.de> + + PR middle-end/102360 + * g++.dg/pr102360.C: New testcase. + +2021-09-16 Richard Earnshaw <rearnsha@arm.com> + + * g++.dg/eh/arm-vfp-unwind.C: Support single-precision. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + PR middle-end/102080 + * gcc.target/i386/pr102080.c: New test. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-vcvtdq2ph-1a.c: New test. + * gcc.target/i386/avx512fp16-vcvtdq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtqq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtqq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtudq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtudq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtuqq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtuqq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtuw2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtuw2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtw2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtw2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtdq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtdq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtqq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtqq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtudq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtudq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtuw2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtuw2ph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtw2ph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtw2ph-1b.c: Ditto. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-helper.h (V512): Add QI + components. + * gcc.target/i386/avx512fp16-vcvtph2dq-1a.c: New test. + * gcc.target/i386/avx512fp16-vcvtph2dq-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2qq-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2qq-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2udq-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2udq-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2uqq-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2uqq-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2uw-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2uw-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2w-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vcvtph2w-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2dq-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2dq-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2qq-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2qq-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2udq-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2udq-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2uqq-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2uqq-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2uw-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2uw-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2w-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vcvtph2w-1b.c: Ditto. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-16 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-vmovsh-1a.c: New test. + * gcc.target/i386/avx512fp16-vmovsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-2a.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-2b.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-3a.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-3b.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-4a.c: Ditto. + * gcc.target/i386/avx512fp16-vmovw-4b.c: Ditto. + +2021-09-15 Patrick Palka <ppalka@redhat.com> + + PR c++/101904 + * g++.dg/ext/conv2.C: New test. + * g++.dg/template/conv17.C: Extend test. + +2021-09-15 Jakub Jelinek <jakub@redhat.com> + + PR c++/88578 + PR c++/102295 + * g++.dg/ext/flexary39.C: New test. + * g++.dg/ext/flexary40.C: New test. + +2021-09-15 Patrick Palka <ppalka@redhat.com> + + PR c++/102050 + * g++.dg/cpp0x/initlist125.C: New test. + * g++.dg/cpp0x/initlist126.C: New test. + +2021-09-15 Alexandre Oliva <oliva@adacore.com> + + * gnat.dg/zcur_attr.adb, gnat.dg/zcur_attr.ads: New. + +2021-09-15 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/pr102327-1.c: New test. + * gcc.target/i386/pr102327-2.c: New test. + * gcc.target/i386/avx512fp16-1c.c: Adjust testcase. + +2021-09-15 Hongyu Wang <hongyu.wang@intel.com> + + * gcc.target/i386/avx-1.c: Adjust builtin macros. + * gcc.target/i386/sse-13.c: Likewise. + * gcc.target/i386/sse-23.c: Likewise. + +2021-09-15 Richard Biener <rguenther@suse.de> + + PR tree-optimization/102318 + * gcc.dg/vect/pr102318.c: New testcase. + +2021-09-15 Hongtao Liu <hongtao.liu@intel.com> + Peter Cordes <peter@cordes.ca> + + PR target/91103 + * gcc.target/i386/pr91103-1.c: Add extract tests. + * gcc.target/i386/pr91103-2.c: Ditto. + +2021-09-15 Thomas Schwinge <thomas@codesourcery.com> + + * gfortran.dg/goacc/unexpected-end.f90: Add OpenACC 'host_data' + testing. + +2021-09-15 Jason Merrill <jason@redhat.com> + + PR c++/48396 + * g++.dg/rtti/undeclared1.C: New test. + +2021-09-14 Harald Anlauf <anlauf@gmx.de> + + PR fortran/102311 + * gfortran.dg/entry_25.f90: New test. + +2021-09-14 Peter Bergner <bergner@linux.ibm.com> + + * gcc.target/powerpc/mma-builtin-6.c: Add second call to xxsetacc + built-in. Update instruction counts. + +2021-09-14 Patrick Palka <ppalka@redhat.com> + + PR c++/102163 + * g++.dg/cpp0x/constexpr-empty17.C: New test. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + PR c++/102295 + * g++.target/i386/pr102295.C: New test. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + PR c++/102305 + * g++.dg/cpp0x/pr102305.C: New test. + +2021-09-14 Tobias Burnus <tobias@codesourcery.com> + + PR fortran/102313 + * gfortran.dg/goacc/unexpected-end.f90: New test. + * gfortran.dg/gomp/unexpected-end.f90: New test. + +2021-09-14 Martin Liska <mliska@suse.cz> + + * g++.dg/gcov/gcov.py: Fix failing pytests as gcov.json.gz + filename was changed in b777f228b481ae881a7fbb09de367a053740932c. + +2021-09-14 Eric Botcazou <ebotcazou@adacore.com> + + * gnat.dg/enum_rep2.adb: New test. + +2021-09-14 Jakub Jelinek <jakub@redhat.com> + + * c-c++-common/gomp/atomic-29.c: Add -march=pentium + dg-additional-options for ia32. Use sync_long_long effective target + instead of sync_int_long. + * lib/target-supports.exp (check_effective_target_sync_long_long): Fix + a syntax error. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-helper.h (V512): + Add xmm component. + * gcc.target/i386/avx512fp16-vfpclassph-1a.c: New test. + * gcc.target/i386/avx512fp16-vfpclassph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vfpclasssh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vfpclasssh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vgetexpph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vgetexpph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vgetexpsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vgetexpsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vgetmantph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vgetmantph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vgetmantsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vgetmantsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vfpclassph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vfpclassph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vgetexpph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vgetexpph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vgetmantph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vgetmantph-1b.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-helper.h (_ROUND_CUR): New macro. + * gcc.target/i386/avx512fp16-vreduceph-1a.c: New test. + * gcc.target/i386/avx512fp16-vreduceph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vreducesh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vreducesh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vrndscaleph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vrndscaleph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vrndscalesh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vrndscalesh-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vreduceph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vreduceph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrndscaleph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrndscaleph-1b.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-vrcpph-1a.c: New test. + * gcc.target/i386/avx512fp16-vrcpph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vrcpsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vrcpsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vscalefph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vscalefph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vscalefsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vscalefsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrcpph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrcpph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vscalefph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vscalefph-1b.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx512fp16-vrsqrtph-1a.c: New test. + * gcc.target/i386/avx512fp16-vrsqrtph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vrsqrtsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vrsqrtsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vsqrtph-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vsqrtph-1b.c: Ditto. + * gcc.target/i386/avx512fp16-vsqrtsh-1a.c: Ditto. + * gcc.target/i386/avx512fp16-vsqrtsh-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrsqrtph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vrsqrtph-1b.c: Ditto. + * gcc.target/i386/avx512fp16vl-vsqrtph-1a.c: Ditto. + * gcc.target/i386/avx512fp16vl-vsqrtph-1b.c: Ditto. + +2021-09-14 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/avx-1.c: Add test for new builtins. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/sse-14.c: Add test for new intrinsics. + * gcc.target/i386/sse-22.c: Ditto. + +2021-09-13 Harald Anlauf <anlauf@gmx.de> + + PR fortran/82314 + * gfortran.dg/pr82314.f90: New test. + +2021-09-13 Harald Anlauf <anlauf@gmx.de> + + PR fortran/85130 + * gfortran.dg/substr_6.f90: Revert commit r8-7574, adding again + test that was erroneously considered as illegal. + +2021-09-13 Jason Merrill <jason@redhat.com> + + * g++.dg/warn/Winterference.H: New file. + * g++.dg/warn/Winterference.C: New test. + * g++.target/aarch64/interference.C: New test. + * g++.target/arm/interference.C: New test. + * g++.target/i386/interference.C: New test. + * g++.dg/warn/Winterference-2.C: New file. + +2021-09-13 Martin Liska <mliska@suse.cz> + H.J. Lu <hjl.tools@gmail.com> + + PR target/101696 + * g++.target/i386/mv30.C: New test. + * gcc.target/i386/mvc16.c: New test. + * gcc.target/i386/builtin_target.c (CHECK___builtin_cpu_supports): + New. + +2021-09-13 Andrew Pinski <apinski@marvell.com> + + PR target/95969 + * gcc.target/aarch64/lane-bound-1.c: New test. + * gcc.target/aarch64/lane-bound-2.c: New test. + +2021-09-13 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + PR target/102252 + * g++.target/aarch64/sve/pr102252.C: New test. + +2021-09-13 Aldy Hernandez <aldyh@redhat.com> + + * gcc.dg/tree-ssa/pr21417.c: Adjust for FSM removal. + * gcc.dg/tree-ssa/pr66752-3.c: Same. + * gcc.dg/tree-ssa/pr68198.c: Same. + * gcc.dg/tree-ssa/pr69196-1.c: Same. + * gcc.dg/tree-ssa/pr70232.c: Same. + * gcc.dg/tree-ssa/pr77445.c: Same. + * gcc.dg/tree-ssa/ranger-threader-4.c: Same. + * gcc.dg/tree-ssa/ssa-dom-thread-18.c: Same. + * gcc.dg/tree-ssa/ssa-dom-thread-6.c: Same. + * gcc.dg/tree-ssa/ssa-thread-12.c: Same. + * gcc.dg/tree-ssa/ssa-thread-13.c: Same. + +2021-09-13 Patrick Palka <ppalka@redhat.com> + + PR c++/101764 + * g++.dg/cpp1z/constexpr-if35.C: New test. + +2021-09-13 Martin Liska <mliska@suse.cz> + + PR c++/101331 + * g++.dg/pr101331.C: New test. + +2021-09-13 Aldy Hernandez <aldyh@redhat.com> + + * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Adjust for aarch64. + +2021-09-13 H.J. Lu <hjl.tools@gmail.com> + + PR target/101935 + * g++.target/i386/pr80566-1.C: Add + -mtune-ctrl=avx256_store_by_pieces. + * gcc.target/i386/pr100865-4a.c: Likewise. + * gcc.target/i386/pr100865-10a.c: Likewise. + * gcc.target/i386/pr90773-20.c: Likewise. + * gcc.target/i386/pr90773-21.c: Likewise. + * gcc.target/i386/pr90773-22.c: Likewise. + * gcc.target/i386/pr90773-23.c: Likewise. + * g++.target/i386/pr80566-2.C: Add + -mtune-ctrl=avx256_move_by_pieces. + * gcc.target/i386/eh_return-1.c: Likewise. + * gcc.target/i386/pr90773-26.c: Likewise. + * gcc.target/i386/pieces-memcpy-12.c: Replace -mtune=haswell + with -mtune-ctrl=avx256_move_by_pieces. + * gcc.target/i386/pieces-memcpy-15.c: Likewise. + * gcc.target/i386/pieces-memset-2.c: Replace -mtune=haswell + with -mtune-ctrl=avx256_store_by_pieces. + * gcc.target/i386/pieces-memset-5.c: Likewise. + * gcc.target/i386/pieces-memset-11.c: Likewise. + * gcc.target/i386/pieces-memset-14.c: Likewise. + * gcc.target/i386/pieces-memset-20.c: Likewise. + * gcc.target/i386/pieces-memset-23.c: Likewise. + * gcc.target/i386/pieces-memset-29.c: Likewise. + * gcc.target/i386/pieces-memset-30.c: Likewise. + * gcc.target/i386/pieces-memset-33.c: Likewise. + * gcc.target/i386/pieces-memset-34.c: Likewise. + * gcc.target/i386/pieces-memset-44.c: Likewise. + * gcc.target/i386/pieces-memset-37.c: Replace -mtune=generic + with -mtune-ctrl=avx256_store_by_pieces. + 2021-09-12 Iain Buclaw <ibuclaw@gdcproject.org> PR d/102185 diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-18.c b/gcc/testsuite/c-c++-common/gomp/atomic-18.c index b389c6a..a8ed3c7 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-18.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-18.c @@ -12,14 +12,12 @@ foo (int j) v = i; #pragma omp atomic acquire , write /* { dg-error "incompatible with 'acquire' clause" } */ i = v; - #pragma omp atomic capture hint (0) capture /* { dg-error "too many 'capture' clauses" "" { target c } } */ - /* { dg-error "too many atomic clauses" "" { target c++ } .-1 } */ + #pragma omp atomic capture hint (0) capture /* { dg-error "too many 'capture' clauses" } */ v = i = i + 1; #pragma omp atomic hint(j + 2) /* { dg-error "constant integer expression" } */ i = i + 1; #pragma omp atomic hint(f) /* { dg-error "integ" } */ i = i + 1; - #pragma omp atomic foobar /* { dg-error "expected 'read', 'write', 'update', 'capture', 'compare', 'weak', 'fail', 'seq_cst', 'acq_rel', 'release', 'relaxed' or 'hint' clause" "" { target c } } */ - /* { dg-error "expected 'read', 'write', 'update', 'capture', 'seq_cst', 'acq_rel', 'release', 'relaxed' or 'hint' clause" "" { target c++ } .-1 } */ - i = i + 1; /* { dg-error "expected end of line before" "" { target *-*-* } .-2 } */ + #pragma omp atomic foobar /* { dg-error "expected 'read', 'write', 'update', 'capture', 'compare', 'weak', 'fail', 'seq_cst', 'acq_rel', 'release', 'relaxed' or 'hint' clause" } */ + i = i + 1; /* { dg-error "expected end of line before" "" { target *-*-* } .-1 } */ } diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-25.c b/gcc/testsuite/c-c++-common/gomp/atomic-25.c index a5196a5..653ef18 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-25.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-25.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ int x, r, z; double d, v; diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-26.c b/gcc/testsuite/c-c++-common/gomp/atomic-26.c index c7e65db..b7a4a1f 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-26.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-26.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ int x; double d; diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-27.c b/gcc/testsuite/c-c++-common/gomp/atomic-27.c index 3d61717..8f1e7e9 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-27.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-27.c @@ -1,5 +1,5 @@ /* PR middle-end/88968 */ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ struct __attribute__((packed)) S { unsigned int a : 16; diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-28.c b/gcc/testsuite/c-c++-common/gomp/atomic-28.c index 50cf223..853ae1c 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-28.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-28.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ /* { dg-additional-options "-O2 -fdump-tree-ompexp" } */ /* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 4, 5, 5\\\);" 1 "ompexp" { target sync_int_long } } } */ /* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 4, 4, 2\\\);" 1 "ompexp" { target sync_int_long } } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-29.c b/gcc/testsuite/c-c++-common/gomp/atomic-29.c index e574c48..1081e43 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-29.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-29.c @@ -1,10 +1,11 @@ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ /* { dg-additional-options "-O2 -fdump-tree-ompexp" } */ -/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 5, 5\\\);" 1 "ompexp" { target sync_int_long } } } */ -/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 4, 2\\\);" 1 "ompexp" { target sync_int_long } } } */ -/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 264, 5, 0\\\);" 1 "ompexp" { target sync_int_long } } } */ -/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 0, 0\\\);" 1 "ompexp" { target sync_int_long } } } */ -/* { dg-final { scan-tree-dump-not "__atomic_load_8 \\\(" "ompexp" { target sync_int_long } } } */ +/* { dg-additional-options "-march=pentium" { target ia32 } } */ +/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 5, 5\\\);" 1 "ompexp" { target sync_long_long } } } */ +/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 4, 2\\\);" 1 "ompexp" { target sync_long_long } } } */ +/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 264, 5, 0\\\);" 1 "ompexp" { target sync_long_long } } } */ +/* { dg-final { scan-tree-dump-times "\.ATOMIC_COMPARE_EXCHANGE \\\(\[^\n\r]*, 8, 0, 0\\\);" 1 "ompexp" { target sync_long_long } } } */ +/* { dg-final { scan-tree-dump-not "__atomic_load_8 \\\(" "ompexp" { target sync_long_long } } } */ double x; diff --git a/gcc/testsuite/c-c++-common/gomp/atomic-30.c b/gcc/testsuite/c-c++-common/gomp/atomic-30.c index f36de70..37a30bb 100644 --- a/gcc/testsuite/c-c++-common/gomp/atomic-30.c +++ b/gcc/testsuite/c-c++-common/gomp/atomic-30.c @@ -1,9 +1,9 @@ -/* { dg-do compile { target c } } */ +/* { dg-do compile } */ int x; double d, g; -double +void foo (int y, double e, long double f) { double v; @@ -21,18 +21,18 @@ foo (int y, double e, long double f) #pragma omp atomic compare if (d + e) { d = e; } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ #pragma omp atomic capture compare - { r = d >= e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare - { r = d <= e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare - { r = d > e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare - { r = d < e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare - { r = d != e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare - { r = d + e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" } */ - #pragma omp atomic capture compare + { r = d >= e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ + { r = d <= e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ + { r = d > e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ + { r = d < e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ + { r = d != e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ + { r = d + e; if (r) { d = f; } } /* { dg-error "expected '==', '<' or '>' comparison in 'if' condition" "" { target c } } */ + #pragma omp atomic capture compare /* { dg-error "invalid form of '#pragma omp atomic' before 'd'" "" { target c++ } .-1 } */ { r = d == e; if (r2) { d = f; } } /* { dg-error "invalid form of '#pragma omp atomic compare' before '\{' token" } */ #pragma omp atomic capture compare if (d > e) { d = e; } /* { dg-error "expected '==' comparison in 'if' condition" } */ @@ -97,41 +97,46 @@ foo (int y, double e, long double f) #pragma omp atomic compare x ^= 5; /* { dg-error "expected '=' before '\\\^=' token" } */ #pragma omp atomic compare - x = x + 3; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x - 5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = 2 * x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = 5 | x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x & ~5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x | 5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x >= 5 ? 5 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x <= 5 ? 5 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x != 5 ? 7 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = 5 == x ? 7 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x == 5 ? x : 7; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x == 5 ? 9 : 7; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x > 5 ? 6 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x < 5 ? 6 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x > 5 ? x : 6; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic compare - x = x < 5 ? x : 6; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ - #pragma omp atomic capture + x = x + 3; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before '\\\+' token" "" { target c++ } .-1 } */ + x = x - 5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before '-' token" "" { target c++ } .-1 } */ + x = 2 * x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic' before numeric constant" "" { target c++ } .-1 } */ + x = 5 | x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic' before numeric constant" "" { target c++ } .-1 } */ + x = x & ~5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before '\\\&' token" "" { target c++ } .-1 } */ + x = x | 5; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before '\\\|' token" "" { target c++ } .-1 } */ + x = x >= 5 ? 5 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid operator for '#pragma omp atomic' before '>=' token" "" { target c++ } .-1 } */ + x = x <= 5 ? 5 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid operator for '#pragma omp atomic' before '<=' token" "" { target c++ } .-1 } */ + x = x != 5 ? 7 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid operator for '#pragma omp atomic' before '!=' token" "" { target c++ } .-1 } */ + x = 5 == x ? 7 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic' before numeric constant" "" { target c++ } .-1 } */ + x = x == 5 ? x : 7; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ + x = x == 5 ? 9 : 7; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ + x = x > 5 ? 6 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ + x = x < 5 ? 6 : x; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ + x = x > 5 ? x : 6; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic compare /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ + x = x < 5 ? x : 6; /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + #pragma omp atomic capture /* { dg-error "invalid form of '#pragma omp atomic compare' before ';' token" "" { target c++ } .-1 } */ r = x == 5; /* { dg-error "invalid operator for '#pragma omp atomic' before '==' token" } */ #pragma omp atomic capture compare r = x == 5; /* { dg-error "expected '=' before '==' token" } */ #pragma omp atomic capture compare /* { dg-error "'#pragma omp atomic compare capture' with non-integral comparison result" } */ { v = x == 5; if (v) { x = 6; } } + #pragma omp atomic compare capture + { r2 = x; x = y; } /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" } */ + #pragma omp atomic compare capture + { r2 = x; x = y == 7 ? 12 : y; } /* { dg-error "invalid form of '#pragma omp atomic' before ';' token" "" { target c } } */ + /* { dg-error "invalid form of '#pragma omp atomic' before 'y'" "" { target c++ } .-1 } */ } diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-empty17.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-empty17.C new file mode 100644 index 0000000..86126da --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-empty17.C @@ -0,0 +1,21 @@ +// PR c++/102163 +// { dg-do compile { target c++11 } } + +struct O { + constexpr O(int) { } +}; + +union _Variadic_union { + constexpr _Variadic_union(int __arg) : _M_rest(__arg) { } + int _M_first; + O _M_rest; +}; + +constexpr _Variadic_union u(42); + +struct _Variant_storage { + constexpr _Variant_storage() : _M_u(42) {} + _Variadic_union _M_u; +}; + +constexpr _Variant_storage w; diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist125.C b/gcc/testsuite/g++.dg/cpp0x/initlist125.C new file mode 100644 index 0000000..49dee1c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist125.C @@ -0,0 +1,12 @@ +// PR c++/102050 +// { dg-do compile { target c++11 } } + +#include <initializer_list> + +struct A { + A(std::initializer_list<int> = {}); +}; + +A x{0}; +A y{1, 2, 3}; +A z; diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist126.C b/gcc/testsuite/g++.dg/cpp0x/initlist126.C new file mode 100644 index 0000000..0a8fb99 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist126.C @@ -0,0 +1,17 @@ +// PR c++/102050 +// { dg-do compile { target c++11 } } + +#include <initializer_list> + +extern struct A a; + +struct A { + A(const A& = a); + A(std::initializer_list<int>) = delete; +}; + +void f(A); + +int main() { + f({}); // { dg-bogus "deleted" } +} diff --git a/gcc/testsuite/g++.dg/cpp0x/pr102305.C b/gcc/testsuite/g++.dg/cpp0x/pr102305.C new file mode 100644 index 0000000..e63adcf --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/pr102305.C @@ -0,0 +1,39 @@ +// PR c++/102305 +// { dg-do compile { target c++11 } } + +namespace std +{ + template<typename _Tp, _Tp __v> + struct integral_constant + { + static constexpr _Tp value = __v; + typedef integral_constant<_Tp, __v> type; + }; + + template<typename _Tp, _Tp __v> + constexpr _Tp integral_constant<_Tp, __v>::value; + + typedef integral_constant<bool, true> true_type; + typedef integral_constant<bool, false> false_type; + + template<bool __v> + using bool_constant = integral_constant<bool, __v>; + + template<typename _Tp, typename... _Args> + struct is_constructible + : public bool_constant<__is_constructible(_Tp, _Args...)> + { + }; +} + +template<typename> +struct A { + virtual ~A() = 0; +}; + +struct B { + virtual ~B() = 0; +}; + +static_assert(!std::is_constructible<A<int> >::value, ""); +static_assert(!std::is_constructible<B>::value, ""); diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1.C b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1.C new file mode 100644 index 0000000..80b48ba --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1.C @@ -0,0 +1,9 @@ +// PR c++/98486 +// { dg-do compile { target c++20 } } + +template<class T, class U> concept C = __is_same(T, U); + +template<C<int>> int v; + +template<> int v<int>; +template<> int v<char>; // { dg-error "match" } diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1a.C b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1a.C new file mode 100644 index 0000000..b12d37d --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1a.C @@ -0,0 +1,14 @@ +// PR c++/98486 +// { dg-do compile { target c++20 } } + +template<class T, class U> concept C = __is_same(T, U); + +struct A { + template<C<int>> static int v; +}; + +template<> int A::v<int>; +template<> int A::v<char>; // { dg-error "match" } + +int x = A::v<int>; +int y = A::v<char>; // { dg-error "invalid" } diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1b.C b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1b.C new file mode 100644 index 0000000..37d7f0f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-var-templ1b.C @@ -0,0 +1,15 @@ +// PR c++/98486 +// { dg-do compile { target c++20 } } + +template<class T, class U> concept C = __is_same(T, U); + +template<class T> +struct A { + template<C<T>> static int v; +}; + +template<> template<> int A<int>::v<int>; +template<> template<> int A<int>::v<char>; // { dg-error "match" } + +int x = A<int>::v<int>; +int y = A<int>::v<char>; // { dg-error "invalid" } diff --git a/gcc/testsuite/g++.dg/eh/arm-vfp-unwind.C b/gcc/testsuite/g++.dg/eh/arm-vfp-unwind.C index 62263c0..8ea25e5 100644 --- a/gcc/testsuite/g++.dg/eh/arm-vfp-unwind.C +++ b/gcc/testsuite/g++.dg/eh/arm-vfp-unwind.C @@ -12,7 +12,11 @@ using namespace std; static void donkey () { - asm volatile ("fcpyd d9, %P0" : : "w" (1.2345) : "d9"); +#if __ARM_FP & 8 + asm volatile ("vmov.f64 d9, %P0" : : "w" (1.2345) : "d9"); +#else + asm volatile ("vmov.f32 s18, %0" : : "t" (1.2345f) : "s18"); +#endif throw 1; } diff --git a/gcc/testsuite/g++.dg/ext/conv2.C b/gcc/testsuite/g++.dg/ext/conv2.C new file mode 100644 index 0000000..baf2a43 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/conv2.C @@ -0,0 +1,13 @@ +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fpermissive" } + +struct A { + A(int*, int); +}; + +void f(A); + +int main() { + const int n = 0; + f({&n, 42}); // { dg-warning "invalid conversion from 'const int\\*' to 'int\\*'" } +} diff --git a/gcc/testsuite/g++.dg/ext/flexary39.C b/gcc/testsuite/g++.dg/ext/flexary39.C new file mode 100644 index 0000000..8eb81f2 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/flexary39.C @@ -0,0 +1,65 @@ +// PR c++/88578 +// { dg-do run } +// { dg-options -Wno-pedantic } + +#define STR(s) #s +#define ASSERT(exp) \ + ((exp) ? (void)0 : (void)(__builtin_printf ("%s:%i: assertion %s failed\n", \ + __FILE__, __LINE__, STR(exp)), \ + __builtin_abort ())) + +typedef int int32_t __attribute__((mode (__SI__))); + +struct Ax { int32_t n, a[]; }; +struct AAx { int32_t i; Ax ax; }; + +int32_t i = 12345678; + +void +test () +{ + { + // OK. Does not assign any elements to flexible array. + Ax s = { 0 }; + ASSERT (s.n == 0); + } + { + // OK only for statically allocated objects, otherwise error. + static Ax s = { 0, { } }; + ASSERT (s.n == 0); + } + { + static Ax s = { 1, { 2 } }; + ASSERT (s.n == 1 && s.a [0] == 2); + } + { + static Ax s = { 2, { 3, 4 } }; + ASSERT (s.n = 2 && s.a [0] == 3 && s.a [1] == 4); + } + { + static Ax s = { 123, i }; + ASSERT (s.n == 123 && s.a [0] == i); + } + { + static Ax s = { 456, { i } }; + ASSERT (s.n == 456 && s.a [0] == i); + } + { + int32_t j = i + 1, k = j + 1; + static Ax s = { 3, { i, j, k } }; + ASSERT (s.n == 3 && s.a [0] == i && s.a [1] == j && s.a [2] == k); + } + + { + // OK. Does not assign any elements to flexible array. + AAx s = { 1, { 2 } }; + ASSERT (s.i == 1 && s.ax.n == 2); + } +} + +int +main () +{ + test (); + test (); +} diff --git a/gcc/testsuite/g++.dg/ext/flexary40.C b/gcc/testsuite/g++.dg/ext/flexary40.C new file mode 100644 index 0000000..ee824c2 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/flexary40.C @@ -0,0 +1,50 @@ +// PR c++/102295 +// { dg-do run } +// { dg-options "" } + +struct A { int a; int b[]; }; +struct B { B (); int k; }; +struct C { int l; B m[]; }; + +int x[4]; +A c = { 42, { ++x[0], ++x[1], ++x[2], ++x[3] } }; +A d = { 43, { 0, ++x[0], ++x[1], ++x[2], ++x[3] } }; +A e = { 44, { ++x[0], ++x[1], ++x[2], 17 } }; +A f = { 45 }; +C n = { 50, { B (), B () } }; +C o = { 51, {} }; + +int +main () +{ + static A g = { 46, { ++x[0], ++x[1], ++x[2], ++x[3] } }; + static A h = { 47, { 0, ++x[0], ++x[1], ++x[2], ++x[3] } }; + static A i = { 48, { ++x[0], ++x[1], ++x[2], 18 } }; + static A j = { 49 }; + if (c.a != 42 || c.b[0] != 1 || c.b[1] != 1 || c.b[2] != 1 || c.b[3] != 1) + __builtin_abort (); + if (d.a != 43 || d.b[0] != 0 || d.b[1] != 2 || d.b[2] != 2 || d.b[3] != 2 || d.b[4] != 2) + __builtin_abort (); + if (e.a != 44 || e.b[0] != 3 || e.b[1] != 3 || e.b[2] != 3 || e.b[3] != 17) + __builtin_abort (); + if (f.a != 45) + __builtin_abort (); + if (g.a != 46 || g.b[0] != 4 || g.b[1] != 4 || g.b[2] != 4 || g.b[3] != 3) + __builtin_abort (); + if (h.a != 47 || h.b[0] != 0 || h.b[1] != 5 || h.b[2] != 5 || h.b[3] != 5 || h.b[4] != 4) + __builtin_abort (); + if (i.a != 48 || i.b[0] != 6 || i.b[1] != 6 || i.b[2] != 6 || i.b[3] != 18) + __builtin_abort (); + if (j.a != 49) + __builtin_abort (); + if (n.l != 50 || n.m[0].k != 42 || n.m[1].k != 42) + __builtin_abort (); + if (o.l != 51) + __builtin_abort (); + if (x[0] != 6 || x[1] != 6 || x[2] != 6 || x[3] != 4) + __builtin_abort (); +} + +B::B () : k (42) +{ +} diff --git a/gcc/testsuite/g++.dg/gcov/gcov.py b/gcc/testsuite/g++.dg/gcov/gcov.py index a8c4ea9..5137f3a 100644 --- a/gcc/testsuite/g++.dg/gcov/gcov.py +++ b/gcc/testsuite/g++.dg/gcov/gcov.py @@ -5,6 +5,9 @@ import os def gcov_from_env(): # return parsed JSON content a GCOV_PATH file - json_filename = os.environ['GCOV_PATH'] + '.gcov.json.gz' + json_filename = os.environ['GCOV_PATH'] + # strip extension + json_filename = json_filename[:json_filename.rindex('.')] + json_filename += '.gcov.json.gz' json_data = gzip.open(json_filename).read() return json.loads(json_data) diff --git a/gcc/testsuite/g++.dg/gomp/atomic-20.C b/gcc/testsuite/g++.dg/gomp/atomic-20.C new file mode 100644 index 0000000..cb7a37b --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/atomic-20.C @@ -0,0 +1,104 @@ +// { dg-do compile } + +int x, r, z; +double d, v; +long double ld; + +template <int N> +void +foo (int y, double e, long double f) +{ + #pragma omp atomic compare update seq_cst + x = x > y ? y : x; + #pragma omp atomic compare relaxed + d = e > d ? e : d; + #pragma omp atomic compare + d = f < d ? f : d; + #pragma omp atomic compare seq_cst fail(relaxed) + x = 12U < x ? 12U : x; + #pragma omp atomic compare + x = x == 7 ? 24 : x; + #pragma omp atomic compare + x = x == 123UL ? 256LL : x; + #pragma omp atomic compare + ld = ld == f ? f + 5.0L : ld; + #pragma omp atomic compare + if (x == 9) { x = 5; } + #pragma omp atomic compare + if (x > 5) { x = 5; } + #pragma omp atomic compare + if (7 > x) { x = 7; } + #pragma omp atomic compare update capture seq_cst fail(acquire) + v = d = f > d ? f : d; + #pragma omp atomic update capture compare + v = x = x < 24ULL ? 24ULL : x; + #pragma omp atomic compare, capture, update + v = x = x == e ? f : x; + #pragma omp atomic capture compare + { v = d; if (d > e) { d = e; } } + #pragma omp atomic compare capture + { if (e < d) { d = e; } v = d; } + #pragma omp atomic compare capture + { y = x; if (x == 42) { x = 7; } } + #pragma omp atomic capture compare weak + { if (x == 42) { x = 7; } y = x; } + #pragma omp atomic capture compare fail(seq_cst) + if (d == 8.0) { d = 16.0; } else { v = d; } + #pragma omp atomic capture compare + { r = x == 8; if (r) { x = 24; } } + #pragma omp atomic compare capture + { r = x == y; if (r) { x = y + 6; } else { z = x; } } +} + +template <typename I, typename D, typename LD> +void +bar (I &x, I &r, I &z, D &d, D &v, LD &ld, I y, D e, LD f) +{ + #pragma omp atomic compare update seq_cst + x = x > y ? y : x; + #pragma omp atomic compare relaxed + d = e > d ? e : d; + #pragma omp atomic compare + d = f < d ? f : d; + #pragma omp atomic compare seq_cst fail(relaxed) + x = 12U < x ? 12U : x; + #pragma omp atomic compare + x = x == 7 ? 24 : x; + #pragma omp atomic compare + x = x == 123UL ? 256LL : x; + #pragma omp atomic compare + ld = ld == f ? f + 5.0L : ld; + #pragma omp atomic compare + if (x == 9) { x = 5; } + #pragma omp atomic compare + if (x > 5) { x = 5; } + #pragma omp atomic compare + if (7 > x) { x = 7; } + #pragma omp atomic compare update capture seq_cst fail(acquire) + v = d = f > d ? f : d; + #pragma omp atomic update capture compare + v = x = x < 24ULL ? 24ULL : x; + #pragma omp atomic compare, capture, update + v = x = x == e ? f : x; + #pragma omp atomic capture compare + { v = d; if (d > e) { d = e; } } + #pragma omp atomic compare capture + { if (e < d) { d = e; } v = d; } + #pragma omp atomic compare capture + { y = x; if (x == 42) { x = 7; } } + #pragma omp atomic capture compare weak + { if (x == 42) { x = 7; } y = x; } + #pragma omp atomic capture compare fail(seq_cst) + if (d == 8.0) { d = 16.0; } else { v = d; } + #pragma omp atomic capture compare + { r = x == 8; if (r) { x = 24; } } + #pragma omp atomic compare capture + { r = x == y; if (r) { x = y + 6; } else { z = x; } } +} + +void +baz (int y, double e, long double f) +{ + foo <0> (y, e, f); + bar (x, r, z, d, v, ld, y, e, f); +} diff --git a/gcc/testsuite/g++.dg/gomp/atomic-5.C b/gcc/testsuite/g++.dg/gomp/atomic-5.C index 78f6344..e2fd591 100644 --- a/gcc/testsuite/g++.dg/gomp/atomic-5.C +++ b/gcc/testsuite/g++.dg/gomp/atomic-5.C @@ -23,7 +23,7 @@ void f1(void) #pragma omp atomic bar() += 1; /* { dg-error "lvalue required" } */ #pragma omp atomic a /* { dg-error "expected end of line" } */ - x++; /* { dg-error "expected 'read', 'write', 'update', 'capture', 'seq_cst', 'acq_rel', 'release', 'relaxed' or 'hint' clause" "" { target *-*-* } .-1 } */ + x++; /* { dg-error "expected 'read', 'write', 'update', 'capture', 'compare', 'weak', 'fail', 'seq_cst', 'acq_rel', 'release', 'relaxed' or 'hint' clause" "" { target *-*-* } .-1 } */ #pragma omp atomic ; /* { dg-error "expected primary-expression" } */ #pragma omp atomic diff --git a/gcc/testsuite/g++.dg/pr102360.C b/gcc/testsuite/g++.dg/pr102360.C new file mode 100644 index 0000000..fdf9e08 --- /dev/null +++ b/gcc/testsuite/g++.dg/pr102360.C @@ -0,0 +1,54 @@ +// { dg-do compile } +// { dg-options "-fno-tree-dse -O1 -ftrivial-auto-var-init=pattern" } + +class A; +template <typename _Tp, int m, int n> class B { +public: + _Tp val[m * n]; +}; +class C { +public: + C(A); +}; +struct D { + D(); + unsigned long &operator[](int); + unsigned long *p; +}; +class A { +public: + template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool); + int rows, cols; + unsigned char *data; + unsigned char *datastart; + unsigned char *dataend; + unsigned char *datalimit; + D step; +}; +template <typename _Tp, int m, int n> +A::A(const B<_Tp, m, n> &p1, bool) + : rows(m), cols(n) { + step[0] = cols * sizeof(_Tp); + datastart = data = (unsigned char *)p1.val; + datalimit = dataend = datastart + rows * step[0]; +} +class F { +public: + static void compute(C); + template <typename _Tp, int m, int n, int nm> + static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &, + B<_Tp, n, nm> &); +}; +D::D() {} +unsigned long &D::operator[](int p1) { return p[p1]; } +template <typename _Tp, int m, int n, int nm> +void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &, + B<_Tp, n, nm> &p4) { + A a(p4, false); + compute(a); +} +void fn1() { + B<double, 4, 4> b, c, e; + B<double, 4, 1> d; + F::compute(b, d, c, e); +} diff --git a/gcc/testsuite/g++.dg/rtti/undeclared1.C b/gcc/testsuite/g++.dg/rtti/undeclared1.C new file mode 100644 index 0000000..9594c22 --- /dev/null +++ b/gcc/testsuite/g++.dg/rtti/undeclared1.C @@ -0,0 +1,5 @@ +// PR c++/48396 + +namespace std { + type_info *p; // { dg-error "type_info" } +} diff --git a/gcc/testsuite/g++.dg/template/conv17.C b/gcc/testsuite/g++.dg/template/conv17.C index ba012c9..f0f10f2 100644 --- a/gcc/testsuite/g++.dg/template/conv17.C +++ b/gcc/testsuite/g++.dg/template/conv17.C @@ -53,4 +53,11 @@ concept D = requires (const T t) { }; static_assert(D<C>); + +// Test that when there's no strictly viable candidate and we're in a +// SFINAE context, we still stop at the first bad argument conversion. +template<class T> +concept E = requires { T().h(nullptr); }; + +static_assert(!E<C>); #endif diff --git a/gcc/testsuite/g++.target/i386/pr102295.C b/gcc/testsuite/g++.target/i386/pr102295.C new file mode 100644 index 0000000..09efc3c --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr102295.C @@ -0,0 +1,12 @@ +// PR c++/102295 +// { dg-do compile { target *-*-linux* } } +// { dg-options "-Wno-pedantic" } + +struct S { + int a; + int b[]; +} S; + +struct S s = { 1, { 2, 3 } }; + +/* { dg-final { scan-assembler ".size\[\t \]*s, 12" } } */ diff --git a/gcc/testsuite/gcc.dg/Wint-in-bool-context-4.c b/gcc/testsuite/gcc.dg/Wint-in-bool-context-4.c new file mode 100644 index 0000000..0e96dd7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wint-in-bool-context-4.c @@ -0,0 +1,35 @@ +/* PR c/102245 */ +/* { dg-options "-Wint-in-bool-context" } */ +/* { dg-do compile } */ + +_Bool test1(_Bool x) +{ + return !(x << 0); /* { dg-warning "boolean context" } */ +} + +_Bool test2(_Bool x) +{ + return !(x << 1); /* { dg-warning "boolean context" } */ +} + +_Bool test3(_Bool x, int y) +{ + return !(x << y); /* { dg-warning "boolean context" } */ +} + +_Bool test4(int x, int y) +{ + return !(x << y); /* { dg-warning "boolean context" } */ +} + +_Bool test5(int x, int y) +{ + return !((x << y) << 0); /* { dg-warning "boolean context" } */ +} + +int test6(_Bool x) +{ + int v = 0; + return (v & ~1L) | (1L & (x << 0)); /* { dg-bogus "boolean context" } */ +} + diff --git a/gcc/testsuite/gcc.dg/vect/pr102318.c b/gcc/testsuite/gcc.dg/vect/pr102318.c new file mode 100644 index 0000000..cc58efa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr102318.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ + +void +vec_slp_int16_t (short int *restrict a, short int *restrict b, int n) +{ + short int x0 = b[0]; + short int x1 = b[1]; + short int x2 = b[2]; + short int x3 = b[3]; + for (int i = 0; i < n; ++i) + { + x0 += a[i * 4]; + x1 += a[i * 4 + 1]; + x2 += a[i * 4 + 2]; + x3 += a[i * 4 + 3]; + } + b[0] = x0; + b[1] = x1; + b[2] = x2; + b[3] = x3; +} diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index d9aa8a7..add3e0c 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -686,25 +686,85 @@ #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) /* avx512fp16intrin.h */ -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8) -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D) +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8) +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8) +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D) +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D) +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D) +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C) +#define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U) +#define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8) +#define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) +#define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) +#define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8) +#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8) +#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8) +#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) /* avx512fp16vlintrin.h */ -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D) +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D) +#define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C) +#define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C) +#define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D) +#define __builtin_ia32_getmantph128_mask(A, E, C, D) __builtin_ia32_getmantph128_mask(A, 1, C, D) /* vpclmulqdqintrin.h */ #define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) diff --git a/gcc/testsuite/gcc.target/i386/avx-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-covert-1.c new file mode 100644 index 0000000..b6c794e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-covert-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vcvtps2pd" } } */ +/* { dg-final { scan-assembler-not "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c new file mode 100644 index 0000000..c40c48b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */ + +extern float f; +extern double d; + +void +foo (void) +{ + d = f; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-not "vcvtps2pd" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c new file mode 100644 index 0000000..01bb64e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern int i; + +void +foo (void) +{ + f = i; +} + +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-1c.c index 49fc2aa..b41a90b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-1c.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-1c.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-mavx512fp16 -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovsh|vmovw)" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpinsrw" 1 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpinsrw" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovsh" 1 } } */ +/* { dg-final { scan-assembler-times "vpblendw" 1 } } */ +/* { dg-final { scan-assembler "vpbroadcastw" } } */ typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16))); typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h b/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h index 5d3539b..ce3cfdc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-helper.h @@ -17,6 +17,7 @@ /* Useful macros. */ #define NOINLINE __attribute__((noinline,noclone)) #define _ROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) +#define _ROUND_CUR 8 #define AVX512F_MAX_ELEM 512 / 32 /* Structure for _Float16 emulation */ @@ -24,18 +25,28 @@ typedef union { __m512 zmm; __m512h zmmh; + __m512i zmmi; + __m512d zmmd; __m256 ymm[2]; __m256h ymmh[2]; __m256i ymmi[2]; + __m256d ymmd[2]; __m128h xmmh[4]; + __m128 xmm[4]; + __m128i xmmi[4]; + __m128d xmmd[4]; unsigned short u16[32]; unsigned int u32[16]; + int i32[16]; + long long s64[8]; + unsigned long long u64[8]; + double f64[8]; float f32[16]; _Float16 f16[32]; } V512; /* Global variables. */ -V512 src1, src2, src3; +V512 src1, src2, src3, src3f; int n_errs = 0; /* Helper function for packing/unpacking ph operands. */ @@ -160,12 +171,16 @@ init_src() int i; for (i = 0; i < AVX512F_MAX_ELEM; i++) { - v1.f32[i] = -i + 1; - v2.f32[i] = i * 0.5f; - v3.f32[i] = i * 2.5f; - v4.f32[i] = i - 0.5f; + v1.f32[i] = i + 1; + v2.f32[i] = i * 0.5f; + v3.f32[i] = i * 1.5f; + v4.f32[i] = i - 0.5f; - src3.u32[i] = (i + 1) * 10; + src3.u32[i] = (i + 1) * 10; + } + + for (i = 0; i < 8; i++) { + src3f.f64[i] = (i + 1) * 7.5; } src1 = pack_twops_2ph(v1, v2); @@ -215,30 +230,50 @@ init_dest(V512 * res, V512 * exp) #if AVX512F_LEN == 256 #undef HF #undef SF +#undef SI +#undef DF +#undef H_HF #undef NET_MASK -#undef MASK_VALUE +#undef MASK_VALUE +#undef HALF_MASK #undef ZMASK_VALUE #define NET_MASK 0xffff #define MASK_VALUE 0xcccc #define ZMASK_VALUE 0xfcc1 +#define HALF_MASK 0xcc #define HF(x) x.ymmh[0] +#define H_HF(x) x.xmmh[0] #define SF(x) x.ymm[0] +#define DF(x) x.ymmd[0] +#define SI(x) x.ymmi[0] #elif AVX512F_LEN == 128 #undef HF #undef SF +#undef DF +#undef SI +#undef H_HF #undef NET_MASK #undef MASK_VALUE #undef ZMASK_VALUE +#undef HALF_MASK #define NET_MASK 0xff #define MASK_VALUE 0xcc +#define HALF_MASK MASK_VALUE #define ZMASK_VALUE 0xc1 #define HF(x) x.xmmh[0] #define SF(x) x.xmm[0] +#define DF(x) x.xmmd[0] +#define SI(x) x.xmmi[0] +#define H_HF(x) x.xmmh[0] #else #define NET_MASK 0xffffffff #define MASK_VALUE 0xcccccccc #define ZMASK_VALUE 0xfcc1fcc1 +#define HALF_MASK 0xcccc #define HF(x) x.zmmh #define SF(x) x.zmm +#define DF(x) x.zmmd +#define SI(x) x.zmmi +#define H_HF(x) x.ymmh[0] #endif diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c new file mode 100644 index 0000000..cf0cc74 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c @@ -0,0 +1,44 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +void +test_512 (void) +{ + V512 res; + + res.ymmh[0] = _mm512_castph512_ph256 (src1.zmmh); + check_results (&res, &src1, 16, "_mm512_castph512_ph256"); + + res.xmmh[0] = _mm512_castph512_ph128 (src1.zmmh); + check_results (&res, &src1, 8, "_mm512_castph512_ph128"); + + res.zmmh = _mm512_castph256_ph512 (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm512_castph256_ph512"); + + res.zmmh = _mm512_castph128_ph512 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm512_castph128_ph512"); + + res.zmm = _mm512_castph_ps (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_ps"); + + res.zmmd = _mm512_castph_pd (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_pd"); + + res.zmmi = _mm512_castph_si512 (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_si512"); + + res.zmmh = _mm512_castps_ph (src1.zmm); + check_results (&res, &src1, 32, "_mm512_castps_ph"); + + res.zmmh = _mm512_castpd_ph (src1.zmmd); + check_results (&res, &src1, 32, "_mm512_castpd_ph"); + + res.zmmh = _mm512_castsi512_ph (src1.zmmi); + check_results (&res, &src1, 32, "_mm512_castsi512_ph"); + + if (n_errs != 0) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c new file mode 100644 index 0000000..a29f1db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c @@ -0,0 +1,43 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512f-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +do_test (void) +{ + union512i_d zero; + union512h ad; + union256h b,bd; + union128h c; + + int i; + + for (i = 0; i < 16; i++) + { + b.a[i] = 65.43f + i; + zero.a[i] = 0; + } + + for (i = 0; i < 8; i++) + { + c.a[i] = 32.01f + i; + } + + ad.x = _mm512_zextph256_ph512 (b.x); + if (memcmp (ad.a, b.a, 32) + || memcmp (&ad.a[16], &zero.a, 32)) + abort (); + + ad.x = _mm512_zextph128_ph512 (c.x); + if (memcmp (ad.a, c.a, 16) + || memcmp (&ad.a[8], &zero.a, 48)) + abort (); + +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1a.c new file mode 100644 index 0000000..45697d94 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res, res1, res2; +volatile __m512i x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepi32_ph (x1); + res1 = _mm512_mask_cvtepi32_ph (res, m16, x2); + res2 = _mm512_maskz_cvtepi32_ph (m16, x3); + res = _mm512_cvt_roundepi32_ph (x1, 4); + res1 = _mm512_mask_cvt_roundepi32_ph (res, m16, x2, 8); + res2 = _mm512_maskz_cvt_roundepi32_ph (m16, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1b.c new file mode 100644 index 0000000..a2bb56c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtdq2ph-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 32) + +void NOINLINE +EMULATE(cvtd2_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.u32[i]; + } + } + *dest = pack_twops_2ph(v5, v5); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtd2_ph)(&exp, src3, NET_MASK, 0); + H_HF(res) = INTRINSIC (_cvtepi32_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtepi32_ph); + + init_dest(&res, &exp); + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 0); + H_HF(res) = INTRINSIC (_mask_cvtepi32_ph) (H_HF(res), HALF_MASK, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtepi32_ph); + + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 1); + H_HF(res) = INTRINSIC (_maskz_cvtepi32_ph) (HALF_MASK, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtepi32_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtd2_ph)(&exp, src3, NET_MASK, 0); + H_HF(res) = INTRINSIC (_cvt_roundepi32_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundepi32_ph); + + init_dest(&res, &exp); + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 0); + H_HF(res) = INTRINSIC (_mask_cvt_roundepi32_ph) (H_HF(res), HALF_MASK, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundepi32_ph); + + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 1); + H_HF(res) = INTRINSIC (_maskz_cvt_roundepi32_ph) (HALF_MASK, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundepi32_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1a.c new file mode 100644 index 0000000..8f74405 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, res1, res2; +volatile __m512d x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtpd_ph (x1); + res1 = _mm512_mask_cvtpd_ph (res, m8, x2); + res2 = _mm512_maskz_cvtpd_ph (m8, x3); + res = _mm512_cvt_roundpd_ph (x1, 4); + res1 = _mm512_mask_cvt_roundpd_ph (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundpd_ph (m8, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1b.c new file mode 100644 index 0000000..dde364b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtpd2ph-1b.c @@ -0,0 +1,82 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 64) + +void NOINLINE +EMULATE(cvtpd2_ph) (V512 * dest, V512 op1, int n_el, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < n_el; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.f64[i]; + } + } + *dest = pack_twops_2ph(v5, v5); + for (i = n_el; i < 8; i++) + dest->u16[i] = 0; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvtpd_ph) (DF(src3f)); + CHECK_RESULT (&res, &exp, 8, _cvtpd_ph); + + init_dest(&res, &exp); + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvtpd_ph) (res.xmmh[0], 0xcc, + DF(src3f)); + CHECK_RESULT (&res, &exp, 8, _mask_cvtpd_ph); + + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, 0xf1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvtpd_ph) (0xf1, DF(src3f)); + CHECK_RESULT (&res, &exp, 8, _maskz_cvtpd_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvt_roundpd_ph) (DF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _cvt_roundpd_ph); + + init_dest(&res, &exp); + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvt_roundpd_ph) (res.xmmh[0], 0xcc, + DF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _mask_cvt_roundpd_ph); + + EMULATE(cvtpd2_ph)(&exp, src3f, N_ELEMS, 0xf1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvt_roundpd_ph) (0xf1, DF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _maskz_cvt_roundpd_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1a.c new file mode 100644 index 0000000..31a5639 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m256h x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epi32 (x1); + res1 = _mm512_mask_cvtph_epi32 (res, m16, x2); + res2 = _mm512_maskz_cvtph_epi32 (m16, x3); + res = _mm512_cvt_roundph_epi32 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epi32 (res, m16, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epi32 (m16, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1b.c new file mode 100644 index 0000000..80a8582 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2dq-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_d) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epi32) (H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epi32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvtph_epi32) (SI(res), HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epi32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epi32) (HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epi32); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epi32) (H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epi32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epi32) (SI(res), HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epi32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epi32) (HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epi32); +#endif + + if (n_errs != 0) + abort (); +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1a.c new file mode 100644 index 0000000..b7bb3b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512d res, res1, res2; +volatile __m128h x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_pd (x1); + res1 = _mm512_mask_cvtph_pd (res, m8, x2); + res2 = _mm512_maskz_cvtph_pd (m8, x3); + res = _mm512_cvt_roundph_pd (x1, 4); + res1 = _mm512_mask_cvt_roundph_pd (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundph_pd (m8, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1b.c new file mode 100644 index 0000000..c20888b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2pd-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_pd) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.f64[i] = v1.f32[i]; + } + } + + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_pd)(&exp, src1, NET_MASK, 0); + DF(res) = INTRINSIC (_cvtph_pd) (src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_pd); + + init_dest(&res, &exp); + EMULATE(cvtph2_pd)(&exp, src1, 0xcc, 0); + DF(res) = INTRINSIC (_mask_cvtph_pd) (DF(res), 0xcc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_pd); + + EMULATE(cvtph2_pd)(&exp, src1, 0xc1, 1); + DF(res) = INTRINSIC (_maskz_cvtph_pd) (0xc1, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_pd); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_pd)(&exp, src1, NET_MASK, 0); + DF(res) = INTRINSIC (_cvt_roundph_pd) (src1.xmmh[0], _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_pd); + + init_dest(&res, &exp); + EMULATE(cvtph2_pd)(&exp, src1, 0xcc, 0); + DF(res) = INTRINSIC (_mask_cvt_roundph_pd) (DF(res), 0xcc, src1.xmmh[0], _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_pd); + + EMULATE(cvtph2_pd)(&exp, src1, 0xc1, 1); + DF(res) = INTRINSIC (_maskz_cvt_roundph_pd) (0xc1, src1.xmmh[0], _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_pd); +#endif + + if (n_errs != 0) { + abort (); +} +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1a.c new file mode 100644 index 0000000..c79549f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512 res, res1, res2; +volatile __m256h x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtxph_ps (x1); + res1 = _mm512_mask_cvtxph_ps (res, m16, x2); + res2 = _mm512_maskz_cvtxph_ps (m16, x3); + res = _mm512_cvtx_roundph_ps (x1, 4); + res1 = _mm512_mask_cvtx_roundph_ps (res, m16, x2, 8); + res2 = _mm512_maskz_cvtx_roundph_ps (m16, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1b.c new file mode 100644 index 0000000..a2f20c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2psx-1b.c @@ -0,0 +1,81 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 32) +#define CHECK_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtxph2_ps) (V512 * dest, V512 op1, int n_el, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < n_el; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i]; + } + } + + for (i = n_el; i < 16; i++) + v5.u32[i] = 0; + + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xffff, 0); + SF(res) = INTRINSIC (_cvtxph_ps) (H_HF(src1)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _cvtxph_ps); + + init_dest(&res, &exp); + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xcc, 0); + SF(res) = INTRINSIC (_mask_cvtxph_ps) (SF(res), 0xcc, H_HF(src1)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _mask_cvtxph_ps); + + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xc1, 1); + SF(res) = INTRINSIC (_maskz_cvtxph_ps) (0xc1, H_HF(src1)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _maskz_cvtxph_ps); + +#if AVX512F_LEN == 512 + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xffff, 0); + SF(res) = INTRINSIC (_cvtx_roundph_ps) (H_HF(src1), _ROUND_CUR); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _cvtx_roundph_ps); + + init_dest(&res, &exp); + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xcc, 0); + SF(res) = INTRINSIC (_mask_cvtx_roundph_ps) (SF(res), 0xcc, H_HF(src1), _ROUND_CUR); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _mask_cvtx_roundph_ps); + + EMULATE(cvtxph2_ps)(&exp, src1, N_ELEMS, 0xc1, 1); + SF(res) = INTRINSIC (_maskz_cvtx_roundph_ps) (0xc1, H_HF(src1), _ROUND_CUR); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _maskz_cvtx_roundph_ps); +#endif + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1a.c new file mode 100644 index 0000000..d80ee61 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m128h x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epi64 (x1); + res1 = _mm512_mask_cvtph_epi64 (res, m8, x2); + res2 = _mm512_maskz_cvtph_epi64 (m8, x3); + res = _mm512_cvt_roundph_epi64 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epi64 (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epi64 (m8, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1b.c new file mode 100644 index 0000000..42b21cf2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2qq-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_q) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epi64) (src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epi64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvtph_epi64) (SI(res), 0xcc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epi64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfa, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epi64) (0xfa, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epi64); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epi64) (src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epi64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epi64) (SI(res), 0xcc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epi64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfa, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epi64) (0xfa, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epi64); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1a.c new file mode 100644 index 0000000..b4a833a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m256h x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epu32 (x1); + res1 = _mm512_mask_cvtph_epu32 (res, m16, x2); + res2 = _mm512_maskz_cvtph_epu32 (m16, x3); + res = _mm512_cvt_roundph_epu32 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epu32 (res, m16, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epu32 (m16, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1b.c new file mode 100644 index 0000000..15fa0ba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2udq-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_d) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epu32) (H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epu32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvtph_epu32) (SI(res), HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epu32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epu32) (HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epu32); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epu32) (H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epu32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epu32) (SI(res), HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epu32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epu32) (HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epu32); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1a.c new file mode 100644 index 0000000..b408779 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m128h x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epu64 (x1); + res1 = _mm512_mask_cvtph_epu64 (res, m8, x2); + res2 = _mm512_maskz_cvtph_epu64 (m8, x3); + res = _mm512_cvt_roundph_epu64 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epu64 (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epu64 (m8, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1b.c new file mode 100644 index 0000000..7f34772 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uqq-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_q) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epu64) (src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epu64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvtph_epu64) (SI(res), 0xcc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epu64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfc, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epu64) (0xfc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epu64); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epu64) (src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epu64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epu64) (SI(res), 0xcc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epu64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfc, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epu64) (0xfc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epu64); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1a.c new file mode 100644 index 0000000..2622745 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m512h x1, x2, x3; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epu16 (x1); + res1 = _mm512_mask_cvtph_epu16 (res, m32, x2); + res2 = _mm512_maskz_cvtph_epu16 (m32, x3); + res = _mm512_cvt_roundph_epu16 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epu16 (res, m32, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epu16 (m32, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1b.c new file mode 100644 index 0000000..437a1f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2uw-1b.c @@ -0,0 +1,84 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_w) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + dest->u16[i] = 0; + } + } + else { + dest->u16[i] = v1.f32[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + dest->u16[i+16] = 0; + } + } + else { + dest->u16[i+16] = v2.f32[i]; + } + } +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epu16) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epu16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvtph_epu16) (SI(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epu16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epu16) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epu16); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epu16) (HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epu16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epu16) (SI(res), MASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epu16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epu16) (ZMASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epu16); +#endif + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1a.c new file mode 100644 index 0000000..bcaa744 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m512h x1, x2, x3; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtph_epi16 (x1); + res1 = _mm512_mask_cvtph_epi16 (res, m32, x2); + res2 = _mm512_maskz_cvtph_epi16 (m32, x3); + res = _mm512_cvt_roundph_epi16 (x1, 4); + res1 = _mm512_mask_cvt_roundph_epi16 (res, m32, x2, 8); + res2 = _mm512_maskz_cvt_roundph_epi16 (m32, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1b.c new file mode 100644 index 0000000..dfa2052 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtph2w-1b.c @@ -0,0 +1,83 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_w) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + dest->u16[i] = 0; + } + } + else { + dest->u16[i] = v1.f32[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + dest->u16[i+16] = 0; + } + } + else { + dest->u16[i+16] = v2.f32[i]; + } + } +} + +void +TEST (void) +{ + V512 res, exp; + + init_src(); + + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtph_epi16) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtph_epi16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvtph_epi16) (SI(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtph_epi16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvtph_epi16) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtph_epi16); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvt_roundph_epi16) (HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundph_epi16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvt_roundph_epi16) (SI(res), MASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundph_epi16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvt_roundph_epi16) (ZMASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundph_epi16); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1a.c new file mode 100644 index 0000000..cb957f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res, res1, res2; +volatile __m512 x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtxps_ph (x1); + res1 = _mm512_mask_cvtxps_ph (res, m16, x2); + res2 = _mm512_maskz_cvtxps_ph (m16, x3); + res = _mm512_cvtx_roundps_ph (x1, 4); + res1 = _mm512_mask_cvtx_roundps_ph (res, m16, x2, 8); + res2 = _mm512_maskz_cvtx_roundps_ph (m16, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1b.c new file mode 100644 index 0000000..e316e76 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtps2ph-1b.c @@ -0,0 +1,84 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 32) +#define CHECK_ELEMS (AVX512F_LEN_HALF / 16) + +void NOINLINE +EMULATE(cvtxps2_ph) (V512 * dest, V512 op1, int n_el, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < n_el; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.f32[i]; + } + } + *dest = pack_twops_2ph(v5, v5); + for (i = n_el; i < 16; i++) + dest->u16[i] = 0; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, NET_MASK, 0); + H_HF(res) = INTRINSIC (_cvtxps_ph) (SF(src3f)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _cvtxps_ph); + + init_dest(&res, &exp); + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, 0xcc, 0); + H_HF(res) = INTRINSIC (_mask_cvtxps_ph) (H_HF(res), 0xcc, + SF(src3f)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _mask_cvtxps_ph); + + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, 0xf1, 1); + H_HF(res) = INTRINSIC (_maskz_cvtxps_ph) (0xf1, SF(src3f)); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _maskz_cvtxps_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, NET_MASK, 0); + H_HF(res) = INTRINSIC (_cvtx_roundps_ph) (SF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _cvtx_roundps_ph); + + init_dest(&res, &exp); + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, 0xcc, 0); + H_HF(res) = INTRINSIC (_mask_cvtx_roundps_ph) (H_HF(res), 0xcc, + SF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _mask_cvtx_roundps_ph); + + EMULATE(cvtxps2_ph)(&exp, src3f, N_ELEMS, 0xf1, 1); + H_HF(res) = INTRINSIC (_maskz_cvtx_roundps_ph) (0xf1, SF(src3f), _ROUND_NINT); + CHECK_RESULT (&res, &exp, CHECK_ELEMS, _maskz_cvtx_roundps_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1a.c new file mode 100644 index 0000000..4e8515e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, res1, res2; +volatile __m512i x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepi64_ph (x1); + res1 = _mm512_mask_cvtepi64_ph (res, m8, x2); + res2 = _mm512_maskz_cvtepi64_ph (m8, x3); + res = _mm512_cvt_roundepi64_ph (x1, 4); + res1 = _mm512_mask_cvt_roundepi64_ph (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundepi64_ph (m8, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1b.c new file mode 100644 index 0000000..cb213b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtqq2ph-1b.c @@ -0,0 +1,84 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 64) + +void NOINLINE +EMULATE(cvtq2_ph) (V512 * dest, V512 op1, int n_el, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < n_el; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.u64[i]; + } + } + + // The left part should be zero + for (i = n_el; i < 16; i++) + v5.f32[i] = 0; + + *dest = pack_twops_2ph(v5, v5); +} + +void +TEST (void) +{ + + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvtepi64_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, 8, _cvtepi64_ph); + + init_dest(&res, &exp); + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvtepi64_ph) (res.xmmh[0], 0xcc, SI(src3)); + CHECK_RESULT (&res, &exp, 8, _mask_cvtepi64_ph); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xf1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvtepi64_ph) (0xf1, SI(src3)); + CHECK_RESULT (&res, &exp, 8, _maskz_cvtepi64_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvt_roundepi64_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _cvt_roundepi64_ph); + + init_dest(&res, &exp); + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvt_roundepi64_ph) (res.xmmh[0], 0xcc, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _mask_cvt_roundepi64_ph); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xf1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvt_roundepi64_ph) (0xf1, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _maskz_cvt_roundepi64_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1a.c new file mode 100644 index 0000000..b663ca5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsd2sh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1; +volatile __m128d x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_cvtsd_sh (x1, x2); + res = _mm_mask_cvtsd_sh (res, m8, x1, x2); + res = _mm_maskz_cvtsd_sh (m8, x1, x2); + res = _mm_cvt_roundsd_sh (x1, x2, 8); + res = _mm_mask_cvt_roundsd_sh (res, m8, x1, x2, 8); + res = _mm_maskz_cvt_roundsd_sh (m8, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1b.c new file mode 100644 index 0000000..5523620 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsd2sh-1b.c @@ -0,0 +1,60 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtsd2sh(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = (float)op2.f64[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtsd2sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_cvt_roundsd_sh(src1.xmmh[0], src2.xmmd[0], + _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsd_sh"); + + init_dest(&res, &exp); + emulate_vcvtsd2sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_mask_cvt_roundsd_sh(res.xmmh[0], 0x1, src1.xmmh[0], + src2.xmmd[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_mask_cvt_roundsd_sh"); + + emulate_vcvtsd2sh(&exp, src1, src2, 0x2, 1); + res.xmmh[0] = _mm_maskz_cvt_roundsd_sh(0x2, src1.xmmh[0], + src2.xmmd[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_maskz_cvt_roundsd_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1a.c new file mode 100644 index 0000000..59719ed --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsh2sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2sd\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2sd\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +volatile __m128d res; +volatile __m128d x1; +volatile __m128h x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_cvtsh_sd (x1, x2); + res = _mm_mask_cvtsh_sd (res, m8, x1, x2); + res = _mm_maskz_cvtsh_sd (m8, x1, x2); + res = _mm_cvt_roundsh_sd (x1, x2, 8); + res = _mm_mask_cvt_roundsh_sd (res, m8, x1, x2, 8); + res = _mm_maskz_cvt_roundsh_sd (m8, x1, x2, 4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1b.c new file mode 100644 index 0000000..e6bdc95 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2sd-1b.c @@ -0,0 +1,57 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtsh2sd(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + + unpack_ph_2twops(op2, &v3, &v4); + + if ((k&1) || !k) + v5.f64[0] = v3.f32[0]; + else if (zero_mask) + v5.f64[0] = 0; + else + v5.f64[0] = dest->f64[0]; + + v5.f64[1] = op1.f64[1]; + + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtsh2sd(&exp, src1, src2, 0x1, 0); + res.xmmd[0] = _mm_cvt_roundsh_sd(src1.xmmd[0], src2.xmmh[0], + _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsh_sd"); + + init_dest(&res, &exp); + emulate_vcvtsh2sd(&exp, src1, src2, 0x1, 0); + res.xmmd[0] = _mm_mask_cvt_roundsh_sd(res.xmmd[0], 0x1, src1.xmmd[0], + src2.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_mask_cvt_roundsh_sd"); + + emulate_vcvtsh2sd(&exp, src1, src2, 0x2, 1); + res.xmmd[0] = _mm_maskz_cvt_roundsh_sd(0x2, src1.xmmd[0], + src2.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_maskz_cvt_roundsh_sd"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1a.c new file mode 100644 index 0000000..f29c953 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2si\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ + + +#include <immintrin.h> + +volatile __m128h x1; +volatile int res1; + +void extern +avx512f_test (void) +{ + res1 = _mm_cvtsh_i32 (x1); + res1 = _mm_cvt_roundsh_i32 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1b.c new file mode 100644 index 0000000..89c492c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si-1b.c @@ -0,0 +1,54 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 2 + +void NOINLINE +emulate_cvtph2_d(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_d(&exp, src1, NET_MASK, 0); + res.i32[0] = _mm_cvt_roundsh_i32(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsh_i32"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1a.c new file mode 100644 index 0000000..0289ebf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2si\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ + + +#include <immintrin.h> + +volatile __m128h x1; +volatile long long res2; + +void extern +avx512f_test (void) +{ + res2 = _mm_cvtsh_i64 (x1); + res2 = _mm_cvt_roundsh_i64 (x1, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1b.c new file mode 100644 index 0000000..6a5e836 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2si64-1b.c @@ -0,0 +1,52 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 4 + +void NOINLINE +emulate_cvtph2_q(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_q(&exp, src1, NET_MASK, 0); + res.s64[0] = _mm_cvt_roundsh_i64(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsh_i64"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1a.c new file mode 100644 index 0000000..e6c369c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsh2ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2ss\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2ss\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ + +#include <immintrin.h> + +volatile __m128 res; +volatile __m128 x1; +volatile __m128h x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_cvtsh_ss (x1, x2); + res = _mm_mask_cvtsh_ss (res, m8, x1, x2); + res = _mm_maskz_cvtsh_ss (m8, x1, x2); + res = _mm_cvt_roundsh_ss (x1, x2, 8); + res = _mm_mask_cvt_roundsh_ss (res, m8, x1, x2, 8); + res = _mm_maskz_cvt_roundsh_ss (m8, x1, x2, 4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1b.c new file mode 100644 index 0000000..3195983 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2ss-1b.c @@ -0,0 +1,59 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + + void NOINLINE +emulate_vcvtsh2ss(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op2, &v3, &v4); + if ((k&1) || !k) + v5.f32[0] = v3.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = dest->f32[0]; + + for (i = 1; i < 4; i++) + v5.f32[i] = op1.f32[i]; + + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtsh2ss(&exp, src1, src2, 0x1, 0); + res.xmm[0] = _mm_cvt_roundsh_ss(src1.xmm[0], src2.xmmh[0], + _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsh_ss"); + + init_dest(&res, &exp); + emulate_vcvtsh2ss(&exp, src1, src2, 0x1, 0); + res.xmm[0] = _mm_mask_cvt_roundsh_ss(res.xmm[0], 0x1, src1.xmm[0], + src2.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_mask_cvt_roundsh_ss"); + + emulate_vcvtsh2ss(&exp, src1, src2, 0x2, 1); + res.xmm[0] = _mm_maskz_cvt_roundsh_ss(0x2, src1.xmm[0], + src2.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_maskz_cvt_roundsh_ss"); + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1a.c new file mode 100644 index 0000000..7d00867 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2usi\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ + + +#include <immintrin.h> + +volatile __m128h x1; +volatile unsigned int res1; + +void extern +avx512f_test (void) +{ + res1 = _mm_cvtsh_u32 (x1); + res1 = _mm_cvt_roundsh_u32 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1b.c new file mode 100644 index 0000000..466ce6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi-1b.c @@ -0,0 +1,54 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 2 + +void NOINLINE +emulate_cvtph2_d(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_d(&exp, src1, NET_MASK, 0); + res.u32[0] = _mm_cvt_roundsh_i32(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundsh_u32"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1a.c new file mode 100644 index 0000000..363252d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2 " } */ +/* { dg-final { scan-assembler-times "vcvtsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsh2usi\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x1; +volatile unsigned long long res2; + +void extern +avx512f_test (void) +{ + res2 = _mm_cvtsh_u64 (x1); + res2 = _mm_cvt_roundsh_u64 (x1, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1b.c new file mode 100644 index 0000000..74643ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsh2usi64-1b.c @@ -0,0 +1,53 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 4 + +void NOINLINE +emulate_cvtph2_q(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_q(&exp, src1, NET_MASK, 0); + res.u64[0] = _mm_cvt_roundsh_i64(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, 4, "_mm_cvt_roundsh_u64"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1a.c new file mode 100644 index 0000000..19d1b96 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsi2shl\[ \\t\]+\[^%\n\]*%e\[^\{\n\]*\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsi2shl\[ \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x; +volatile int n; + +void extern +avx512f_test (void) +{ + x = _mm_cvti32_sh (x, n); + x = _mm_cvt_roundi32_sh (x, n, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1b.c new file mode 100644 index 0000000..d9c9a85 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh-1b.c @@ -0,0 +1,41 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtsi2sh(V512 *dest, V512 op1, + int value_32, __int64_t value_64, int bits) +{ + V512 v1,v2,v5,v6; + unpack_ph_2twops(op1, &v1, &v2); + if (bits == 32) + v5.xmm[0] = _mm_cvt_roundi32_ss (v1.xmm[0], value_32, _ROUND_NINT); +#ifdef __x86_64__ + else + v5.xmm[0] = _mm_cvt_roundi64_ss (v1.xmm[0], value_64, _ROUND_NINT); +#endif + v5.xmm[1] = v1.xmm[1]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtsi2sh(&exp, src1, 99, 0, 32); + res.xmmh[0] = _mm_cvt_roundi32_sh(src1.xmmh[0], 99, _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundi32_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1a.c new file mode 100644 index 0000000..7781e36 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtsi2shq\[ \\t\]+\[^%\n\]*%r\[^\{\n\]*\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtsi2shq\[ \\t\]+\[^%\n\]*%r\[^\{\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x; +volatile long long n; + +void extern +avx512f_test (void) +{ + x = _mm_cvti64_sh (x, n); + x = _mm_cvt_roundi64_sh (x, n, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1b.c new file mode 100644 index 0000000..6f66a87 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtsi2sh64-1b.c @@ -0,0 +1,41 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtsi2sh(V512 *dest, V512 op1, + int value_32, __int64_t value_64, int bits) +{ + V512 v1,v2,v5,v6; + unpack_ph_2twops(op1, &v1, &v2); + if (bits == 32) + v5.xmm[0] = _mm_cvt_roundi32_ss (v1.xmm[0], value_32, _ROUND_NINT); +#ifdef __x86_64__ + else + v5.xmm[0] = _mm_cvt_roundi64_ss (v1.xmm[0], value_64, _ROUND_NINT); +#endif + v5.xmm[1] = v1.xmm[1]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtsi2sh(&exp, src1, 0, 99, 64); + res.xmmh[0] = _mm_cvt_roundi64_sh(src1.xmmh[0], 99, _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundi64_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1a.c new file mode 100644 index 0000000..63ad090 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtss2sh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1; +volatile __m128 x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_cvtss_sh (x1, x2); + res = _mm_mask_cvtss_sh (res, m8, x1, x2); + res = _mm_maskz_cvtss_sh (m8, x1, x2); + res = _mm_cvt_roundss_sh (x1, x2, 8); + res = _mm_mask_cvt_roundss_sh (res, m8, x1, x2, 8); + res = _mm_maskz_cvt_roundss_sh (m8, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1b.c new file mode 100644 index 0000000..94981bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtss2sh-1b.c @@ -0,0 +1,60 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtss2sh(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = op2.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtss2sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_cvt_roundss_sh(src1.xmmh[0], src2.xmm[0], + _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundss_sh"); + + init_dest(&res, &exp); + emulate_vcvtss2sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_mask_cvt_roundss_sh(res.xmmh[0], 0x1, src1.xmmh[0], + src2.xmm[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_mask_cvt_roundss_sh"); + + emulate_vcvtss2sh(&exp, src1, src2, 0x2, 1); + res.xmmh[0] = _mm_maskz_cvt_roundss_sh(0x2, src1.xmmh[0], + src2.xmm[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "mm_maskz_cvt_roundss_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1a.c new file mode 100644 index 0000000..0e44aaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m256h x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epi32 (x1); + res1 = _mm512_mask_cvttph_epi32 (res, m16, x2); + res2 = _mm512_maskz_cvttph_epi32 (m16, x3); + res = _mm512_cvtt_roundph_epi32 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epi32 (res, m16, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epi32 (m16, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1b.c new file mode 100644 index 0000000..c18fefb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2dq-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_d) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epi32) (H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epi32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvttph_epi32) (SI(res), HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epi32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epi32) (HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epi32); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epi32) (H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epi32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epi32) (SI(res), HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epi32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epi32) (HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epi32); +#endif + + if (n_errs != 0) + abort (); +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1a.c new file mode 100644 index 0000000..1241694 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m128h x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epi64 (x1); + res1 = _mm512_mask_cvttph_epi64 (res, m8, x2); + res2 = _mm512_maskz_cvttph_epi64 (m8, x3); + res = _mm512_cvtt_roundph_epi64 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epi64 (res, m8, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epi64 (m8, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1b.c new file mode 100644 index 0000000..2a9a2ca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2qq-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_q) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epi64) (src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epi64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvttph_epi64) (SI(res), 0xcc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epi64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfa, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epi64) (0xfa, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epi64); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epi64) (src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epi64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epi64) (SI(res), 0xcc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epi64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfa, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epi64) (0xfa, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epi64); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1a.c new file mode 100644 index 0000000..0fd60f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m256h x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epu32 (x1); + res1 = _mm512_mask_cvttph_epu32 (res, m16, x2); + res2 = _mm512_maskz_cvttph_epu32 (m16, x3); + res = _mm512_cvtt_roundph_epu32 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epu32 (res, m16, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epu32 (m16, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1b.c new file mode 100644 index 0000000..98bce37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2udq-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_d) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epu32) (H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epu32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvttph_epu32) (SI(res), HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epu32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epu32) (HALF_MASK, H_HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epu32); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_d)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epu32) (H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epu32); + + init_dest(&res, &exp); + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epu32) (SI(res), HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epu32); + + EMULATE(cvtph2_d)(&exp, src1, HALF_MASK, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epu32) (HALF_MASK, H_HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epu32); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1a.c new file mode 100644 index 0000000..04fee29 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m128h x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epu64 (x1); + res1 = _mm512_mask_cvttph_epu64 (res, m8, x2); + res2 = _mm512_maskz_cvttph_epu64 (m8, x3); + res = _mm512_cvtt_roundph_epu64 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epu64 (res, m8, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epu64 (m8, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1b.c new file mode 100644 index 0000000..31879ef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uqq-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_q) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epu64) (src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epu64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvttph_epu64) (SI(res), 0xcc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epu64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfc, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epu64) (0xfc, src1.xmmh[0]); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epu64); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_q)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epu64) (src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epu64); + + init_dest(&res, &exp); + EMULATE(cvtph2_q)(&exp, src1, 0xcc, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epu64) (SI(res), 0xcc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epu64); + + EMULATE(cvtph2_q)(&exp, src1, 0xfc, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epu64) (0xfc, src1.xmmh[0], _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epu64); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1a.c new file mode 100644 index 0000000..b31af84 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m512h x1, x2, x3; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epu16 (x1); + res1 = _mm512_mask_cvttph_epu16 (res, m32, x2); + res2 = _mm512_maskz_cvttph_epu16 (m32, x3); + res = _mm512_cvtt_roundph_epu16 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epu16 (res, m32, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epu16 (m32, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1b.c new file mode 100644 index 0000000..34e94e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2uw-1b.c @@ -0,0 +1,84 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_w) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + dest->u16[i] = 0; + } + } + else { + dest->u16[i] = v1.f32[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + dest->u16[i+16] = 0; + } + } + else { + dest->u16[i+16] = v2.f32[i]; + } + } +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epu16) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epu16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvttph_epu16) (SI(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epu16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epu16) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epu16); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epu16) (HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epu16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epu16) (SI(res), MASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epu16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epu16) (ZMASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epu16); +#endif + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1a.c new file mode 100644 index 0000000..a918594 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512i res, res1, res2; +volatile __m512h x1, x2, x3; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvttph_epi16 (x1); + res1 = _mm512_mask_cvttph_epi16 (res, m32, x2); + res2 = _mm512_maskz_cvttph_epi16 (m32, x3); + res = _mm512_cvtt_roundph_epi16 (x1, 4); + res1 = _mm512_mask_cvtt_roundph_epi16 (res, m32, x2, 8); + res2 = _mm512_maskz_cvtt_roundph_epi16 (m32, x3, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1b.c new file mode 100644 index 0000000..23bc8e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttph2w-1b.c @@ -0,0 +1,83 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtph2_w) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + dest->u16[i] = 0; + } + } + else { + dest->u16[i] = v1.f32[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + dest->u16[i+16] = 0; + } + } + else { + dest->u16[i+16] = v2.f32[i]; + } + } +} + +void +TEST (void) +{ + V512 res, exp; + + init_src(); + + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvttph_epi16) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvttph_epi16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvttph_epi16) (SI(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvttph_epi16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvttph_epi16) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvttph_epi16); + +#if AVX512F_LEN == 512 + EMULATE(cvtph2_w)(&exp, src1, NET_MASK, 0); + SI(res) = INTRINSIC (_cvtt_roundph_epi16) (HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtt_roundph_epi16); + + init_dest(&res, &exp); + EMULATE(cvtph2_w)(&exp, src1, MASK_VALUE, 0); + SI(res) = INTRINSIC (_mask_cvtt_roundph_epi16) (SI(res), MASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtt_roundph_epi16); + + EMULATE(cvtph2_w)(&exp, src1, ZMASK_VALUE, 1); + SI(res) = INTRINSIC (_maskz_cvtt_roundph_epi16) (ZMASK_VALUE, HF(src1), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtt_roundph_epi16); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c new file mode 100644 index 0000000..80d84fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x1; +volatile int res1; + +void extern +avx512f_test (void) +{ + res1 = _mm_cvttsh_i32 (x1); + res1 = _mm_cvtt_roundsh_i32 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c new file mode 100644 index 0000000..c5b0a64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c @@ -0,0 +1,54 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 2 + +void NOINLINE +emulate_cvtph2_d(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_d(&exp, src1, NET_MASK, 0); + res.i32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i32"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c new file mode 100644 index 0000000..76a9053 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x1; +volatile long long res2; + +void extern +avx512f_test (void) +{ + res2 = _mm_cvttsh_i64 (x1); + res2 = _mm_cvtt_roundsh_i64 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c new file mode 100644 index 0000000..4e0fe5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c @@ -0,0 +1,52 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 4 + +void NOINLINE +emulate_cvtph2_q(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_q(&exp, src1, NET_MASK, 0); + res.s64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i64"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c new file mode 100644 index 0000000..5956457 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x1; +volatile unsigned int res1; + +void extern +avx512f_test (void) +{ + res1 = _mm_cvttsh_u32 (x1); + res1 = _mm_cvtt_roundsh_u32 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c new file mode 100644 index 0000000..214e3e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c @@ -0,0 +1,54 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 2 + +void NOINLINE +emulate_cvtph2_d(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u32[i] = 0; + } + else { + v5.u32[i] = dest->u32[i]; + } + } + else { + v5.u32[i] = v1.f32[i]; + + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_d(&exp, src1, NET_MASK, 0); + res.u32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_u32"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c new file mode 100644 index 0000000..23e8e70 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2 " } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x1; +volatile unsigned long long res2; + +void extern +avx512f_test (void) +{ + res2 = _mm_cvttsh_u64 (x1); + res2 = _mm_cvtt_roundsh_u64 (x1, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c new file mode 100644 index 0000000..863fb6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c @@ -0,0 +1,53 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 4 + +void NOINLINE +emulate_cvtph2_q(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + + for (i = 0; i < 8; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.u64[i] = 0; + } + else { + v5.u64[i] = dest->u64[i]; + } + } + else { + v5.u64[i] = v1.f32[i]; + } + } + *dest = v5; +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_cvtph2_q(&exp, src1, NET_MASK, 0); + res.u64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, 4, "_mm_cvtt_roundsh_u64"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1a.c new file mode 100644 index 0000000..8d90ef6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res, res1, res2; +volatile __m512i x1, x2, x3; +volatile __mmask16 m16; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepu32_ph (x1); + res1 = _mm512_mask_cvtepu32_ph (res, m16, x2); + res2 = _mm512_maskz_cvtepu32_ph (m16, x3); + res = _mm512_cvt_roundepu32_ph (x1, 4); + res1 = _mm512_mask_cvt_roundepu32_ph (res, m16, x2, 8); + res2 = _mm512_maskz_cvt_roundepu32_ph (m16, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1b.c new file mode 100644 index 0000000..e9c1cd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtudq2ph-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 32) + +void NOINLINE +EMULATE(cvtd2_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.u32[i]; + } + } + *dest = pack_twops_2ph(v5, v5); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtd2_ph)(&exp, src3, NET_MASK, 0); + H_HF(res)= INTRINSIC (_cvtepu32_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtepu32_ph); + + init_dest(&res, &exp); + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 0); + H_HF(res) = INTRINSIC (_mask_cvtepu32_ph) (H_HF(res), HALF_MASK, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtepu32_ph); + + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 1); + H_HF(res) = INTRINSIC (_maskz_cvtepu32_ph) (HALF_MASK, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtepu32_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtd2_ph)(&exp, src3, NET_MASK, 0); + H_HF(res)= INTRINSIC (_cvt_roundepu32_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundepu32_ph); + + init_dest(&res, &exp); + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 0); + H_HF(res) = INTRINSIC (_mask_cvt_roundepu32_ph) (H_HF(res), HALF_MASK, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundepu32_ph); + + EMULATE(cvtd2_ph)(&exp, src3, HALF_MASK, 1); + H_HF(res) = INTRINSIC (_maskz_cvt_roundepu32_ph) (HALF_MASK, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundepu32_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1a.c new file mode 100644 index 0000000..a234bb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, res1, res2; +volatile __m512i x1, x2, x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepu64_ph (x1); + res1 = _mm512_mask_cvtepu64_ph (res, m8, x2); + res2 = _mm512_maskz_cvtepu64_ph (m8, x3); + res = _mm512_cvt_roundepu64_ph (x1, 4); + res1 = _mm512_mask_cvt_roundepu64_ph (res, m8, x2, 8); + res2 = _mm512_maskz_cvt_roundepu64_ph (m8, x3, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1b.c new file mode 100644 index 0000000..873d910 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuqq2ph-1b.c @@ -0,0 +1,83 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 64) + +void NOINLINE +EMULATE(cvtq2_ph) (V512 * dest, V512 op1, int n_el, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < n_el; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = op1.u64[i]; + } + } + + // The left part should be zero + for (i = n_el; i < 16; i++) + v5.f32[i] = 0; + + *dest = pack_twops_2ph(v5, v5); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvtepu64_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, 8, _cvtepu64_ph); + + init_dest(&res, &exp); + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvtepu64_ph) (res.xmmh[0], 0xcc, SI(src3)); + CHECK_RESULT (&res, &exp, 8, _mask_cvtepu64_ph); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xc1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvtepu64_ph) (0xc1, SI(src3)); + CHECK_RESULT (&res, &exp, 8, _maskz_cvtepu64_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, NET_MASK, 0); + res.xmmh[0] = INTRINSIC (_cvt_roundepu64_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _cvt_roundepu64_ph); + + init_dest(&res, &exp); + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xcc, 0); + res.xmmh[0] = INTRINSIC (_mask_cvt_roundepu64_ph) (res.xmmh[0], 0xcc, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _mask_cvt_roundepu64_ph); + + EMULATE(cvtq2_ph)(&exp, src3, N_ELEMS, 0xc1, 1); + res.xmmh[0] = INTRINSIC (_maskz_cvt_roundepu64_ph) (0xc1, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, 8, _maskz_cvt_roundepu64_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1a.c new file mode 100644 index 0000000..3b6d095 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtusi2shl\[ \\t\]+\[^%\n\]*%e\[^\{\n\]*\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtusi2shl\[ \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x; +volatile unsigned n; + +void extern +avx512f_test (void) +{ + x = _mm_cvtu32_sh (x, n); + x = _mm_cvt_roundu32_sh (x, n, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1b.c new file mode 100644 index 0000000..d339f0a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh-1b.c @@ -0,0 +1,41 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtusi2sh(V512 *dest, V512 op1, + int value_32, __int64_t value_64, int bits) +{ + V512 v1,v2,v5,v6; + unpack_ph_2twops(op1, &v1, &v2); + if (bits == 32) + v5.xmm[0] = _mm_cvt_roundu32_ss (v1.xmm[0], value_32, _ROUND_NINT); +#ifdef __x86_64__ + else + v5.xmm[0] = _mm_cvt_roundu64_ss (v1.xmm[0], value_64, _ROUND_NINT); +#endif + v5.xmm[1] = v1.xmm[1]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtusi2sh(&exp, src1, 99, 0, 32); + res.xmmh[0] = _mm_cvt_roundu32_sh(src1.xmmh[0], 99, _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundu32_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1a.c new file mode 100644 index 0000000..30fcdc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtusi2shq\[ \\t\]+\[^%\n\]*%r\[^\{\n\]*\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtusi2shq\[ \\t\]+\[^%\n\]*%r\[^\{\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x; +volatile unsigned long long n; + +void extern +avx512f_test (void) +{ + x = _mm_cvtu64_sh (x, n); + x = _mm_cvt_roundu64_sh (x, n, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1b.c new file mode 100644 index 0000000..20e711e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtusi2sh64-1b.c @@ -0,0 +1,41 @@ +/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_vcvtusi2sh(V512 *dest, V512 op1, + int value_32, __int64_t value_64, int bits) +{ + V512 v1,v2,v5,v6; + unpack_ph_2twops(op1, &v1, &v2); + if (bits == 32) + v5.xmm[0] = _mm_cvt_roundu32_ss (v1.xmm[0], value_32, _ROUND_NINT); +#ifdef __x86_64__ + else + v5.xmm[0] = _mm_cvt_roundu64_ss (v1.xmm[0], value_64, _ROUND_NINT); +#endif + v5.xmm[1] = v1.xmm[1]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_vcvtusi2sh(&exp, src1, 0, 99, 64); + res.xmmh[0] = _mm_cvt_roundu64_sh(src1.xmmh[0], 99, _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_cvt_roundu64_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1a.c new file mode 100644 index 0000000..43c96a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res; +volatile __m512i x1; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepu16_ph (x1); + res = _mm512_mask_cvtepu16_ph (res, m32, x1); + res = _mm512_maskz_cvtepu16_ph (m32, x1); + res = _mm512_cvt_roundepu16_ph (x1, 4); + res = _mm512_mask_cvt_roundepu16_ph (res, m32, x1, 8); + res = _mm512_maskz_cvt_roundepu16_ph (m32, x1, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1b.c new file mode 100644 index 0000000..6d6b6da --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtuw2ph-1b.c @@ -0,0 +1,93 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtw2_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.f32[i] = v7.f32[i]; + } + } + else { + v5.f32[i] = op1.u16[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.f32[i] = v8.f32[i]; + } + } + else { + v6.f32[i] = op1.u16[i+16]; + } + } + + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtw2_ph)(&exp, src3, NET_MASK, 0); + HF(res) = INTRINSIC (_cvtepu16_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtepu16_ph); + + init_dest(&res, &exp); + EMULATE(cvtw2_ph)(&exp, src3, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_cvtepu16_ph) (HF(res), MASK_VALUE, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtepu16_ph); + + EMULATE(cvtw2_ph)(&exp, src3, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_cvtepu16_ph) (ZMASK_VALUE, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtepu16_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtw2_ph)(&exp, src3, NET_MASK, 0); + HF(res) = INTRINSIC (_cvt_roundepu16_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundepu16_ph); + + init_dest(&res, &exp); + EMULATE(cvtw2_ph)(&exp, src3, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_cvt_roundepu16_ph) (HF(res), MASK_VALUE, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundepu16_ph); + + EMULATE(cvtw2_ph)(&exp, src3, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_cvt_roundepu16_ph) (ZMASK_VALUE, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundepu16_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1a.c new file mode 100644 index 0000000..c6eaee1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res; +volatile __m512i x1; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_cvtepi16_ph (x1); + res = _mm512_mask_cvtepi16_ph (res, m32, x1); + res = _mm512_maskz_cvtepi16_ph (m32, x1); + res = _mm512_cvt_roundepi16_ph (x1, 4); + res = _mm512_mask_cvt_roundepi16_ph (res, m32, x1, 8); + res = _mm512_maskz_cvt_roundepi16_ph (m32, x1, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1b.c new file mode 100644 index 0000000..e02b6fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvtw2ph-1b.c @@ -0,0 +1,92 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(cvtw2_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.f32[i] = v7.f32[i]; + } + } + else { + v5.f32[i] = op1.u16[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.f32[i] = v8.f32[i]; + } + } + else { + v6.f32[i] = op1.u16[i+16]; + } + } + + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(cvtw2_ph)(&exp, src3, NET_MASK, 0); + HF(res) = INTRINSIC (_cvtepi16_ph) (SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvtepi16_ph); + + init_dest(&res, &exp); + EMULATE(cvtw2_ph)(&exp, src3, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_cvtepi16_ph) (HF(res), MASK_VALUE, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvtepi16_ph); + + EMULATE(cvtw2_ph)(&exp, src3, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_cvtepi16_ph) (ZMASK_VALUE, SI(src3)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvtepi16_ph); + +#if AVX512F_LEN == 512 + EMULATE(cvtw2_ph)(&exp, src3, NET_MASK, 0); + HF(res) = INTRINSIC (_cvt_roundepi16_ph) (SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _cvt_roundepi16_ph); + + init_dest(&res, &exp); + EMULATE(cvtw2_ph)(&exp, src3, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_cvt_roundepi16_ph) (HF(res), MASK_VALUE, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_cvt_roundepi16_ph); + + EMULATE(cvtw2_ph)(&exp, src3, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_cvt_roundepi16_ph) (ZMASK_VALUE, SI(src3), _ROUND_NINT); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_cvt_roundepi16_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1a.c new file mode 100644 index 0000000..a97dddf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vfpclassphz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclassphz\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h x512; +volatile __mmask16 m32; + +void extern +avx512dq_test (void) +{ + m32 = _mm512_fpclass_ph_mask (x512, 13); + m32 = _mm512_mask_fpclass_ph_mask (2, x512, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1b.c new file mode 100644 index 0000000..9ffb560 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1b.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "avx512f-helper.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#include "avx512f-mask-type.h" +#define SIZE (AVX512F_LEN / 16) + +#ifndef __FPCLASSPH__ +#define __FPCLASSPH__ +int check_fp_class_hp (_Float16 src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +MASK_TYPE +CALC (_Float16 *s1, int imm) +{ + int i; + MASK_TYPE res = 0; + + for (i = 0; i < SIZE; i++) + if (check_fp_class_hp(s1[i], imm)) + res = res | (1 << i); + + return res; +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE res1, res2, res_ref = 0; + MASK_TYPE mask = MASK_VALUE; + + src.a[0] = NAN; + src.a[1] = 1.0 / 0.0; + for (i = 1; i < SIZE; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); + res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((mask & res_ref) != res2) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1a.c new file mode 100644 index 0000000..7a31fd8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vfpclasssh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclasssh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x128; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m8 = _mm_fpclass_sh_mask (x128, 13); + m8 = _mm_mask_fpclass_sh_mask (m8, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1b.c new file mode 100644 index 0000000..bdc6f9f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclasssh-1b.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "avx512f-helper.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#include "avx512f-mask-type.h" +#define SIZE (128 / 16) + +#ifndef __FPCLASSSH__ +#define __FPCLASSSH__ +int check_fp_class_hp (_Float16 src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +__mmask8 +CALC (_Float16 *s1, int imm) +{ + int i; + __mmask8 res = 0; + + if (check_fp_class_hp(s1[0], imm)) + res = res | 1; + + return res; +} + +void +TEST (void) +{ + int i; + union128h src; + __mmask8 res1, res2, res_ref = 0; + __mmask8 mask = MASK_VALUE; + + src.a[0] = 1.0 / 0.0; + for (i = 1; i < SIZE; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = _mm_fpclass_sh_mask (src.x, 0xFF); + res2 = _mm_mask_fpclass_sh_mask (mask, src.x, 0xFF); + + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((mask & res_ref) != res2) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1a.c new file mode 100644 index 0000000..993cbd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1} } */ + +#include <immintrin.h> + +volatile __m512h x; +volatile __mmask32 m; + +void extern +avx512f_test (void) +{ + x = _mm512_getexp_ph (x); + x = _mm512_mask_getexp_ph (x, m, x); + x = _mm512_maskz_getexp_ph (m, x); + x = _mm512_getexp_round_ph (x, _MM_FROUND_NO_EXC); + x = _mm512_mask_getexp_round_ph (x, m, x, _MM_FROUND_NO_EXC); + x = _mm512_maskz_getexp_round_ph (m, x, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1b.c new file mode 100644 index 0000000..3483c95 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpph-1b.c @@ -0,0 +1,99 @@ + /* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(getexp_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + float emu[32]; + __mmask16 m1, m2; + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + v3.zmm = _mm512_getexp_round_ps(v1.zmm, _ROUND_CUR); + v4.zmm = _mm512_getexp_round_ps(v2.zmm, _ROUND_CUR); + for (i=0; i<16; i++) + { + emu[i] = v3.f32[i]; + emu[i+16] = v4.f32[i]; + } + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = emu[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = emu[i+16]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(getexp_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_getexp_ph) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _getexp_ph); + + init_dest(&res, &exp); + EMULATE(getexp_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_getexp_ph) (HF(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_getexp_ph); + + EMULATE(getexp_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_getexp_ph) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_getexp_ph); +#if AVX512F_LEN == 512 + EMULATE(getexp_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_getexp_round_ph) (HF(src1), _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _getexp_round_ph); + + init_dest(&res, &exp); + EMULATE(getexp_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_getexp_round_ph) (HF(res), MASK_VALUE, HF(src1), + _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_getexp_round_ph); + + EMULATE(getexp_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_getexp_round_ph) (ZMASK_VALUE, HF(src1), _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_getexp_round_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1a.c new file mode 100644 index 0000000..397fd3e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\{\n\]\[^\n\]*%xmm\[0-9\]+\, %xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\{\n\]\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\{\n\]\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\, %xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetexpsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x; +volatile __mmask8 m; + +void extern +avx512f_test (void) +{ + x = _mm_getexp_sh (x, x); + x = _mm_mask_getexp_sh (x, m, x, x); + x = _mm_maskz_getexp_sh (m, x, x); + x = _mm_getexp_round_sh (x, x, _MM_FROUND_NO_EXC); + x = _mm_mask_getexp_round_sh (x, m, x, x, _MM_FROUND_NO_EXC); + x = _mm_maskz_getexp_round_sh (m, x, x, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1b.c new file mode 100644 index 0000000..ca9834d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetexpsh-1b.c @@ -0,0 +1,61 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_getexp_sh(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v0, v1, v2, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + v0.xmm[0] = _mm_getexp_round_ss (v1.xmm[0], v1.xmm[0], _ROUND_CUR); + + if ((k&1) || !k) + v5.f32[0] = v0.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_getexp_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_getexp_round_sh(exp.xmmh[0], src1.xmmh[0], _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_getexp_round_sh"); + + init_dest(&res, &exp); + emulate_getexp_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_getexp_round_sh(res.xmmh[0], 0x1, exp.xmmh[0], + src1.xmmh[0], _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_mask_getexp_round_sh"); + + emulate_getexp_sh(&exp, src1, 0x3, 1); + res.xmmh[0] = _mm_maskz_getexp_round_sh(0x3, exp.xmmh[0], src1.xmmh[0], + _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_getexp_round_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1a.c new file mode 100644 index 0000000..69e0c72 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h x, y; +volatile __mmask32 m; + +void extern +avx512f_test (void) +{ + x = _mm512_getmant_ph (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm512_mask_getmant_ph (x, m, y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm512_maskz_getmant_ph (m, y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm512_getmant_round_ph (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); + x = _mm512_mask_getmant_round_ph (x, m, y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); + x = _mm512_maskz_getmant_round_ph (m, y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1b.c new file mode 100644 index 0000000..c18d1aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantph-1b.c @@ -0,0 +1,102 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(getmant_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + float emu[32]; + __mmask16 m1, m2; + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + v3.zmm = _mm512_getmant_round_ps(v1.zmm, 2, 0, _ROUND_CUR); + v4.zmm = _mm512_getmant_round_ps(v2.zmm, 2, 0, _ROUND_CUR); + for (i=0; i<16; i++) + { + emu[i] = v3.f32[i]; + emu[i+16] = v4.f32[i]; + } + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = emu[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = emu[i+16]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(getmant_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_getmant_ph) (HF(src1), 2, 0); + CHECK_RESULT (&res, &exp, N_ELEMS, _getmant_ph); + + init_dest(&res, &exp); + EMULATE(getmant_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_getmant_ph) (HF(res), MASK_VALUE, + HF(src1), 2, 0); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_getmant_ph); + + EMULATE(getmant_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_getmant_ph) (ZMASK_VALUE, HF(src1), + 2, 0); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_getmant_ph); +#if AVX512F_LEN == 512 + EMULATE(getmant_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_getmant_round_ph) (HF(src1), 2, 0, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _getmant_round_ph); + + init_dest(&res, &exp); + EMULATE(getmant_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_getmant_round_ph) (HF(res), MASK_VALUE, + HF(src1), 2, 0, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_getmant_round_ph); + + EMULATE(getmant_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_getmant_round_ph) (ZMASK_VALUE, HF(src1), + 2, 0, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_getmant_round_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1a.c new file mode 100644 index 0000000..b533f20 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantsh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h x, y, z; +volatile __mmask8 m; + +void extern +avx512f_test (void) +{ + x = _mm_getmant_sh (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm_mask_getmant_sh (x, m, y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm_maskz_getmant_sh (m, y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm_getmant_round_sh (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); + x = _mm_mask_getmant_round_sh (x, m, y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); + x = _mm_maskz_getmant_round_sh (m, y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1b.c new file mode 100644 index 0000000..bee8b04 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vgetmantsh-1b.c @@ -0,0 +1,62 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_getmant_sh(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v0, v1, v2, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + v0.xmm[0] = _mm_getmant_round_ss (v1.xmm[0], v1.xmm[0], 2, 0, _ROUND_CUR); + + if ((k&1) || !k) + v5.f32[0] = v0.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_getmant_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_getmant_round_sh(src1.xmmh[0], exp.xmmh[0], + 2, 0, _ROUND_CUR); + check_results(&res, &exp, 1, "_mm_getmant_round_sh"); + + init_dest(&res, &exp); + emulate_getmant_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_getmant_round_sh(res.xmmh[0], 0x1, src1.xmmh[0], + exp.xmmh[0], 2, 0, _ROUND_CUR); + check_results(&res, &exp, 1, "_mm_mask_getmant_round_sh"); + + emulate_getmant_sh(&exp, src1, 0x3, 1); + res.xmmh[0] = _mm_maskz_getmant_round_sh(0x3, src1.xmmh[0], exp.xmmh[0], + 2, 0, _ROUND_CUR); + check_results(&res, &exp, 1, "_mm_maskz_getmant_round_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1a.c new file mode 100644 index 0000000..e35be10 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]ax+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^z\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +extern _Float16 const* p; +volatile __m128h x1, x2, res; +volatile __mmask8 m8; + +void +avx512f_test (void) +{ + x2 = _mm_mask_load_sh (x1, m8, p); + x2 = _mm_maskz_load_sh (m8, p); + _mm_mask_store_sh (p, m8, x1); + + res = _mm_move_sh (x1, x2); + res = _mm_mask_move_sh (res, m8, x1, x2); + res = _mm_maskz_move_sh (m8, x1, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1b.c new file mode 100644 index 0000000..cea224a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovsh-1b.c @@ -0,0 +1,115 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +void NOINLINE +emulate_mov2_load_sh(V512 * dest, V512 op1, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = v1.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; //remains unchanged + + for (i = 1; i < 8; i++) + v5.f32[i] = 0; + + *dest = pack_twops_2ph(v5, v6); +} + +void NOINLINE +emulate_mov3_load_sh(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = v3.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; //remains unchanged + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void NOINLINE +emulate_mov2_store_sh(V512 * dest, V512 op1, __mmask8 k) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = v1.f32[0]; + else + v5.f32[0] = v7.f32[0]; //remains unchanged + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + // no mask + emulate_mov2_load_sh (&exp, src1, 0x0, 0); + res.xmmh[0] = _mm_load_sh((const void *)&(src1.u16[0])); + check_results(&res, &exp, 8, "_mm_load_sh"); + + // with mask and mask bit is set + emulate_mov2_load_sh (&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_load_sh(res.xmmh[0], 0x1, (const void *)&(src1.u16[0])); + check_results(&res, &exp, 8, "_mm__mask_load_sh"); + + // with zero-mask + emulate_mov2_load_sh (&exp, src1, 0x0, 1); + res.xmmh[0] = _mm_maskz_load_sh(0x1, (const void *)&(src1.u16[0])); + check_results(&res, &exp, 8, "_mm_maskz_load_sh"); + + emulate_mov3_load_sh (&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_mask_move_sh(res.xmmh[0], 0x1, src1.xmmh[0], src2.xmmh[0]); + check_results(&res, &exp, 8, "_mm_mask_move_sh"); + + emulate_mov3_load_sh (&exp, src1, src2, 0x1, 1); + res.xmmh[0] = _mm_maskz_move_sh(0x1, src1.xmmh[0], src2.xmmh[0]); + check_results(&res, &exp, 8, "_mm_maskz_move_sh"); + + // no mask + emulate_mov2_store_sh (&exp, src1, 0x0); + _mm_store_sh((void *)&(res.u16[0]), src1.xmmh[0]); + check_results(&exp, &res, 1, "_mm_store_sh"); + + // with mask + emulate_mov2_store_sh (&exp, src1, 0x1); + _mm_mask_store_sh((void *)&(res.u16[0]), 0x1, src1.xmmh[0]); + check_results(&exp, &res, 1, "_mm_mask_store_sh"); + + if (n_errs != 0) { + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1a.c new file mode 100644 index 0000000..177802c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1a.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vmovw\[^-]" 1 } } */ +/* { dg-final { scan-assembler-times "vpextrw" 1 } } */ +#include <immintrin.h> + +volatile __m128i x1; +volatile short x2; + +void extern +avx512f_test (void) +{ + x1 = _mm_cvtsi16_si128 (x2); + x2 = _mm_cvtsi128_si16 (x1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1b.c new file mode 100644 index 0000000..a96007d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-1b.c @@ -0,0 +1,27 @@ +/* { dg-do run {target avx512fp16} } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" + +static void +do_test (void) +{ + union128i_w u; + short b = 128; + short e[8] = {0,0,0,0,0,0,0,0}; + + u.x = _mm_cvtsi16_si128 (b); + + e[0] = b; + + if (check_union128i_w (u, e)) + abort (); + u.a[0] = 123; + b = _mm_cvtsi128_si16 (u.x); + if (u.a[0] != b) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2a.c new file mode 100644 index 0000000..efa24e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2a.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +typedef short __v8hi __attribute__ ((__vector_size__ (16))); +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); + +__m128i +__attribute__ ((noinline, noclone)) +foo1 (short x) +{ + return __extension__ (__m128i)(__v8hi) { x, 0, 0, 0, 0, 0, 0, 0 }; +} + +__m128i +__attribute__ ((noinline, noclone)) +foo2 (short *x) +{ + return __extension__ (__m128i)(__v8hi) { *x, 0, 0, 0, 0, 0, 0, 0 }; +} + +/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2b.c new file mode 100644 index 0000000..b680a16 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-2b.c @@ -0,0 +1,53 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#include <string.h> + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" +#include "avx512fp16-vmovw-2a.c" + +__m128i +__attribute__ ((noinline,noclone)) +foo3 (__m128i x) +{ + return foo1 (((__v8hi) x)[0]); +} + +static void +do_test (void) +{ + short x; + union128i_w u = { -1, -1,}; + union128i_w exp = { 0, 0}; + __m128i v; + union128i_w a; + + x = 25; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo1 (x); + a.x = v; + if (check_union128i_w (a, exp.a)) + abort (); + + x = 33; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo2 (&x); + a.x = v; + if (check_union128i_w (a, exp.a)) + abort (); + + x = -33; + u.a[0] = x; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo3 (u.x); + a.x = v; + if (check_union128i_w (a, exp.a)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3a.c new file mode 100644 index 0000000..c603107 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3a.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef long long __m256i __attribute__ ((__vector_size__ (32), __may_alias__)); + +__m256i +__attribute__ ((noinline, noclone)) +foo1 (short x) +{ + return __extension__ (__m256i)(__v16hi) { x, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +__m256i +__attribute__ ((noinline, noclone)) +foo2 (short *x) +{ + return __extension__ (__m256i)(__v16hi) { *x, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3b.c new file mode 100644 index 0000000..13c1f65 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-3b.c @@ -0,0 +1,52 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#include <string.h> + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" +#include "avx512fp16-vmovw-3a.c" + +__m256i +__attribute__ ((noinline,noclone)) +foo3 (__m256i x) +{ + return foo1 (((__v16hi) x)[0]); +} + +static void +do_test (void) +{ + short x; + union256i_w u = { -1, -1, -1, -1 }; + union256i_w exp = { 0, 0, 0, 0 }; + + __m256i v; + union256i_w a; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo1 (x); + a.x = v; + if (check_union256i_w (a, exp.a)) + abort (); + + x = 33; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo2 (&x); + a.x = v; + if (check_union256i_w (a, exp.a)) + abort (); + + x = -23; + u.a[0] = x; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo3 (u.x); + a.x = v; + if (check_union256i_w (a, exp.a)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4a.c new file mode 100644 index 0000000..2ba198d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); + +__m512i +__attribute__ ((noinline, noclone)) +foo1 (short x) +{ + return __extension__ (__m512i)(__v32hi) { x, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +__m512i +__attribute__ ((noinline, noclone)) +foo2 (short *x) +{ + return __extension__ (__m512i)(__v32hi) { *x, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +/* { dg-final { scan-assembler-times "vmovw\[^-\n\r]*xmm0" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4b.c new file mode 100644 index 0000000..ec6477b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vmovw-4b.c @@ -0,0 +1,52 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16" } */ + +#include <string.h> + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512-check.h" +#include "avx512fp16-vmovw-4a.c" + +__m512i +__attribute__ ((noinline,noclone)) +foo3 (__m512i x) +{ + return foo1 (((__v32hi) x)[0]); +} + +static void +do_test (void) +{ + short x = 25; + union512i_w u = { -1, -1, -1, -1, -1, -1, -1, -1 }; + union512i_w exp = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + __m512i v; + union512i_w a; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo1 (x); + a.x = v; + if (check_union512i_w (a, exp.a)) + abort (); + + x = 55; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo2 (&x); + a.x = v; + if (check_union512i_w (a, exp.a)) + abort (); + + x = 33; + u.a[0] = x; + exp.a[0] = x; + memset (&v, -1, sizeof (v)); + v = foo3 (u.x); + a.x = v; + if (check_union512i_w (a, exp.a)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1a.c new file mode 100644 index 0000000..6a5c642 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1a.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res; +volatile __m512h x1; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_rcp_ph (x1); + res = _mm512_mask_rcp_ph (res, m32, x1); + res = _mm512_maskz_rcp_ph (m32, x1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1b.c new file mode 100644 index 0000000..4a65451 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpph-1b.c @@ -0,0 +1,79 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(rcp_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = 1. / v1.f32[i]; + + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = 1. / v2.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(rcp_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_rcp_ph) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _rcp_ph); + + init_dest(&res, &exp); + EMULATE(rcp_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_rcp_ph) (HF(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_rcp_ph); + + EMULATE(rcp_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_rcp_ph) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_rcp_ph); + + if (n_errs != 0) + abort (); +} + + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1a.c new file mode 100644 index 0000000..0a5a18e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1a.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrcpsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcpsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrcpsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1, x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_rcp_sh (x1, x2); + res = _mm_mask_rcp_sh (res, m8, x1, x2); + res = _mm_maskz_rcp_sh (m8, x1, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1b.c new file mode 100644 index 0000000..5316895 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrcpsh-1b.c @@ -0,0 +1,57 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_rcp_sh(V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = 1. / v1.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_rcp_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_rcp_sh(exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_rcp_sh"); + + init_dest(&res, &exp); + emulate_rcp_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_rcp_sh(res.xmmh[0], 0x1, exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_mask_rcp_sh"); + + emulate_rcp_sh(&exp, src1, 0x3, 1); + res.xmmh[0] = _mm_maskz_rcp_sh(0x3, exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_rcp_sh"); + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1a.c new file mode 100644 index 0000000..536c1ef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m512h x1; +volatile __mmask32 m; + +void extern +avx512fp16_test (void) +{ + x1 = _mm512_reduce_ph (x1, IMM); + x1 = _mm512_mask_reduce_ph (x1, m, x1, IMM); + x1 = _mm512_maskz_reduce_ph (m, x1, IMM); + x1 = _mm512_reduce_round_ph (x1, IMM, 8); + x1 = _mm512_mask_reduce_round_ph (x1, m, x1, IMM, 8); + x1 = _mm512_maskz_reduce_round_ph (m, x1, IMM, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1b.c new file mode 100644 index 0000000..20d1ba5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vreduceph-1b.c @@ -0,0 +1,116 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +#ifndef __REDUCEPH__ +#define __REDUCEPH__ +V512 borrow_reduce_ps(V512 v, int imm8) +{ + V512 temp; + switch (imm8) + { + case 1: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 1);break; + case 2: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 2);break; + case 3: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 3);break; + case 4: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 4);break; + case 5: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 5);break; + case 6: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 6);break; + case 7: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 7);break; + case 8: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 8);break; + } + return temp; +} +#endif + +void NOINLINE +EMULATE(reduce_ph) (V512 * dest, V512 op1, + __mmask32 k, int imm8, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + V512 t1,t2; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + t1 = borrow_reduce_ps(v1, imm8); + t2 = borrow_reduce_ps(v2, imm8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = t1.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = t2.f32[i]; + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(reduce_ph) (&exp, src1, NET_MASK, 6, 0); + HF(res) = INTRINSIC (_reduce_ph) (HF(src1), 6); + CHECK_RESULT (&res, &exp, N_ELEMS, _reduce_ph); + + init_dest(&res, &exp); + EMULATE(reduce_ph) (&exp, src1, MASK_VALUE, 5, 0); + HF(res) = INTRINSIC (_mask_reduce_ph) (HF(res), MASK_VALUE, HF(src1), 5); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_reduce_ph); + + EMULATE(reduce_ph) (&exp, src1, ZMASK_VALUE, 4, 1); + HF(res) = INTRINSIC (_maskz_reduce_ph) (ZMASK_VALUE, HF(src1), 4); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_reduce_ph); + +#if AVX512F_LEN == 512 + EMULATE(reduce_ph) (&exp, src1, NET_MASK, 6, 0); + HF(res) = INTRINSIC (_reduce_round_ph) (HF(src1), 6, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _reduce_round_ph); + + init_dest(&res, &exp); + EMULATE(reduce_ph) (&exp, src1, MASK_VALUE, 5, 0); + HF(res) = INTRINSIC (_mask_reduce_round_ph) (HF(res), MASK_VALUE, HF(src1), 5, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_reduce_round_ph); + + EMULATE(reduce_ph) (&exp, src1, ZMASK_VALUE, 4, 1); + HF(res) = INTRINSIC (_maskz_reduce_round_ph) (ZMASK_VALUE, HF(src1), 4, _ROUND_CUR); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_reduce_round_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1a.c new file mode 100644 index 0000000..8036991 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vreducesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vreducesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducesh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include <immintrin.h> + +#define IMM 123 + +volatile __m128h x1, x2; +volatile __mmask8 m; + +void extern +avx512fp16_test (void) +{ + x1 = _mm_reduce_sh (x1, x2, IMM); + x1 = _mm_mask_reduce_sh(x1, m, x1, x2, IMM); + x1 = _mm_maskz_reduce_sh(m, x1, x2, IMM); + x1 = _mm_reduce_round_sh (x1, x2, IMM, 4); + x1 = _mm_mask_reduce_round_sh(x1, m, x1, x2, IMM, 8); + x1 = _mm_maskz_reduce_round_sh(m, x1, x2, IMM, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1b.c new file mode 100644 index 0000000..4c5dfe7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vreducesh-1b.c @@ -0,0 +1,78 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +V512 borrow_reduce_ps(V512 v, int imm8) +{ + V512 temp; + switch (imm8) + { + case 1: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 1);break; + case 2: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 2);break; + case 3: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 3);break; + case 4: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 4);break; + case 5: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 5);break; + case 6: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 6);break; + case 7: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 7);break; + case 8: temp.zmm = _mm512_mask_reduce_ps (v.zmm, 0xffff, v.zmm, 8);break; + } + return temp; +} + +void NOINLINE +emulate_reduce_sh(V512 * dest, V512 op1, + __mmask32 k, int imm8, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + V512 t1; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + t1 = borrow_reduce_ps(v1, imm8); + + if ((k&1) || !k) + v5.f32[0] = t1.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_reduce_sh(&exp, src1, 0x1, 8, 0); + res.xmmh[0] = _mm_reduce_round_sh(src1.xmmh[0], exp.xmmh[0], 8, _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_reduce_round_sh"); + + init_dest(&res, &exp); + emulate_reduce_sh(&exp, src1, 0x1, 7, 0); + res.xmmh[0] = _mm_mask_reduce_round_sh(res.xmmh[0], 0x1, src1.xmmh[0], exp.xmmh[0], 7, _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_mask_reduce_round_sh"); + + emulate_reduce_sh(&exp, src1, 0x3, 6, 1); + res.xmmh[0] = _mm_maskz_reduce_round_sh(0x3, src1.xmmh[0], exp.xmmh[0], 6, _ROUND_CUR); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_reduce_round_sh"); + + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1a.c new file mode 100644 index 0000000..8a30727 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m512h x1; +volatile __mmask32 m; + +void extern +avx512fp16_test (void) +{ + x1 = _mm512_roundscale_ph (x1, IMM); + x1 = _mm512_mask_roundscale_ph (x1, m, x1, IMM); + x1 = _mm512_maskz_roundscale_ph (m, x1, IMM); + x1 = _mm512_roundscale_round_ph (x1, IMM, 8); + x1 = _mm512_mask_roundscale_round_ph (x1, m, x1, IMM, 8); + x1 = _mm512_maskz_roundscale_round_ph (m, x1, IMM, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1b.c new file mode 100644 index 0000000..d50e755 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscaleph-1b.c @@ -0,0 +1,101 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(roundscale_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask, int round) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + V512 t1, t2; + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + if (round==0) + { + t1.zmm = _mm512_maskz_roundscale_ps (0xffff, v1.zmm, 0x11); + t2.zmm = _mm512_maskz_roundscale_ps (0xffff, v2.zmm, 0x11); + } + else + { + t1.zmm = _mm512_maskz_roundscale_ps (0xffff, v1.zmm, 0x14); + t2.zmm = _mm512_maskz_roundscale_ps (0xffff, v2.zmm, 0x14); + } + for (i = 0; i < 16; i++) + { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = t1.f32[i]; + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = t2.f32[i]; + } + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res, exp; + + init_src(); + + EMULATE(roundscale_ph) (&exp, src1, NET_MASK, 0, 1); + HF(res) = INTRINSIC (_roundscale_ph) (HF(src1), 0x13); + CHECK_RESULT (&res, &exp, N_ELEMS, _roundscale_ph); + + init_dest(&res, &exp); + EMULATE(roundscale_ph) (&exp, src1, MASK_VALUE, 0, 1); + HF(res) = INTRINSIC (_mask_roundscale_ph) (HF(res), MASK_VALUE, HF(src1), 0x14); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_roundscale_ph); + + EMULATE(roundscale_ph) (&exp, src1, ZMASK_VALUE, 1, 1); + HF(res) = INTRINSIC (_maskz_roundscale_ph) (ZMASK_VALUE, HF(src1), 0x14); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_roundscale_ph); + +#if AVX512F_LEN == 512 + EMULATE(roundscale_ph) (&exp, src1, NET_MASK, 0, 1); + HF(res) = INTRINSIC (_roundscale_round_ph) (HF(src1), 0x13, 0x08); + CHECK_RESULT (&res, &exp, N_ELEMS, _roundscale_round_ph); + + init_dest(&res, &exp); + EMULATE(roundscale_ph) (&exp, src1, MASK_VALUE, 0, 1); + HF(res) = INTRINSIC (_mask_roundscale_round_ph) (HF(res), MASK_VALUE, HF(src1), 0x14, 0x08); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_roundscale_round_ph); + + EMULATE(roundscale_ph) (&exp, src1, ZMASK_VALUE, 1, 1); + HF(res) = INTRINSIC (_maskz_roundscale_round_ph) (ZMASK_VALUE, HF(src1), 0x14, 0x08); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_roundscale_round_ph); +#endif + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1a.c new file mode 100644 index 0000000..bd41b63 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m128h x1, x2; +volatile __mmask8 m; + +void extern +avx512fp16_test (void) +{ + x1 = _mm_roundscale_sh (x1, x2, IMM); + x1 = _mm_mask_roundscale_sh(x1, m, x1, x2, IMM); + x1 = _mm_maskz_roundscale_sh(m, x1, x2, IMM); + x1 = _mm_roundscale_round_sh (x1, x2, IMM, 4); + x1 = _mm_mask_roundscale_round_sh(x1, m, x1, x2, IMM, 8); + x1 = _mm_maskz_roundscale_round_sh(m, x1, x2, IMM, 8); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1b.c new file mode 100644 index 0000000..c103389 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrndscalesh-1b.c @@ -0,0 +1,62 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_roundscale_sh(V512 * dest, V512 op1, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + V512 t1,t2; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + t1.zmm = _mm512_maskz_roundscale_ps (0xffff, v1.zmm, 0x14); + t2.zmm = _mm512_maskz_roundscale_ps (0xffff, v2.zmm, 0x14); + + if ((k&1) || !k) + v5.f32[0] = t1.f32[0]; + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_roundscale_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_roundscale_round_sh(src1.xmmh[0], src1.xmmh[0], 0x1, 0x08); + check_results(&res, &exp, N_ELEMS, "_mm_roundscale_round_sh"); + + init_dest(&res, &exp); + emulate_roundscale_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_roundscale_round_sh(res.xmmh[0], + 0x1, src1.xmmh[0], src1.xmmh[0], 0x1, 0x08); + check_results(&res, &exp, N_ELEMS, "_mm_mask_roundscale_round_sh"); + + emulate_roundscale_sh(&exp, src1, 0x3, 1); + res.xmmh[0] = _mm_maskz_roundscale_round_sh(0x3, src1.xmmh[0], src1.xmmh[0], 0x1, 0x08); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_roundscale_round_sh"); + + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1a.c new file mode 100644 index 0000000..c9671e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1a.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res; +volatile __m512h x1; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_rsqrt_ph (x1); + res = _mm512_mask_rsqrt_ph (res, m32, x1); + res = _mm512_maskz_rsqrt_ph (m32, x1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1b.c new file mode 100644 index 0000000..237971d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtph-1b.c @@ -0,0 +1,77 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(rsqrt_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = 1. / sqrtf(v1.f32[i]); + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = 1. / sqrtf(v2.f32[i]); + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(rsqrt_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_rsqrt_ph) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _rsqrt_ph); + + init_dest(&res, &exp); + EMULATE(rsqrt_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_rsqrt_ph) (HF(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_rsqrt_ph); + + EMULATE(rsqrt_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_rsqrt_ph) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_rsqrt_ph); + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1a.c new file mode 100644 index 0000000..060ce33 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1a.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vrsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1, x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_rsqrt_sh (x1, x2); + res = _mm_mask_rsqrt_sh (res, m8, x1, x2); + res = _mm_maskz_rsqrt_sh (m8, x1, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1b.c new file mode 100644 index 0000000..5f20de7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vrsqrtsh-1b.c @@ -0,0 +1,59 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_rsqrt_sh(V512 * dest, V512 op1, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = 1.0 / sqrtf(v1.f32[0]); + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_rsqrt_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_rsqrt_sh(exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_rsqrt_sh"); + + init_dest(&res, &exp); + emulate_rsqrt_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_rsqrt_sh(res.xmmh[0], 0x1, exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_mask_rsqrt_sh"); + + emulate_rsqrt_sh(&exp, src1, 0x1, 1); + res.xmmh[0] = _mm_maskz_rsqrt_sh(0x1, exp.xmmh[0], src1.xmmh[0]); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_rsqrt_sh"); + + if (n_errs != 0) { + abort (); + } + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1a.c new file mode 100644 index 0000000..f3d2789 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1a.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res, res1, res2; +volatile __m512h x1, x2; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_scalef_ph (x1, x2); + res1 = _mm512_mask_scalef_ph (res1, m32, x1, x2); + res2 = _mm512_maskz_scalef_ph (m32, x1, x2); + res = _mm512_scalef_round_ph (x1, x2, 8); + res1 = _mm512_mask_scalef_round_ph (res1, m32, x1, x2, 8); + res2 = _mm512_maskz_scalef_round_ph (m32, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1b.c new file mode 100644 index 0000000..7c7288d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefph-1b.c @@ -0,0 +1,94 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define DEBUG + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(scalef_ph) (V512 * dest, V512 op1, V512 op2, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = v1.f32[i] * powf(2.0f, floorf(v3.f32[i])); + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = v2.f32[i] * powf(2.0f, floorf(v4.f32[i])); + } + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(scalef_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_scalef_ph) (HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _scalef_ph); + + init_dest(&res, &exp); + EMULATE(scalef_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_scalef_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_scalef_ph); + + EMULATE(scalef_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_scalef_ph) (ZMASK_VALUE, HF(src1), HF(src2)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_scalef_ph); + +#if AVX512F_LEN == 512 + EMULATE(scalef_ph) (&exp, src1, src2, NET_MASK, 0); + HF(res) = INTRINSIC (_scalef_round_ph) (HF(src1), HF(src2), 0x04); + CHECK_RESULT (&res, &exp, N_ELEMS, _scalef_round_ph); + + init_dest(&res, &exp); + EMULATE(scalef_ph) (&exp, src1, src2, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_scalef_round_ph) (HF(res), MASK_VALUE, HF(src1), HF(src2), 0x04); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_scalef_round_ph); + + EMULATE(scalef_ph) (&exp, src1, src2, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_scalef_round_ph) (ZMASK_VALUE, HF(src1), HF(src2), 0x04); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_scalef_round_ph); +#endif + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1a.c new file mode 100644 index 0000000..999c048 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1a.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vscalefsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vscalefsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1, x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_scalef_sh (x1, x2); + res = _mm_mask_scalef_sh (res, m8, x1, x2); + res = _mm_maskz_scalef_sh (m8, x1, x2); + res = _mm_scalef_round_sh (x1, x2, 4); + res = _mm_mask_scalef_round_sh (res, m8, x1, x2, 8); + res = _mm_maskz_scalef_round_sh (m8, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1b.c new file mode 100644 index 0000000..5db7be0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vscalefsh-1b.c @@ -0,0 +1,58 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_scalef_sh(V512 * dest, V512 op1, V512 op2, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(op2, &v3, &v4); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = v1.f32[0] * powf(2.0f, floorf(v3.f32[0])); + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + emulate_scalef_sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_scalef_round_sh(src1.xmmh[0], src2.xmmh[0], (0x00 | 0x08)); + check_results(&res, &exp, N_ELEMS, "_mm_scalef_round_sh"); + + init_dest(&res, &exp); + emulate_scalef_sh(&exp, src1, src2, 0x1, 0); + res.xmmh[0] = _mm_mask_scalef_round_sh(res.xmmh[0], 0x1, src1.xmmh[0], src2.xmmh[0], (0x00 | 0x08)); + check_results(&res, &exp, N_ELEMS, "_mm_mask_scalef_round_sh"); + + emulate_scalef_sh(&exp, src1, src2, 0x3, 1); + res.xmmh[0] = _mm_maskz_scalef_round_sh(0x3, src1.xmmh[0], src2.xmmh[0], (0x00 | 0x08)); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_scalef_round_sh"); + + if (n_errs != 0) { + abort (); + } +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1a.c new file mode 100644 index 0000000..497b5ba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1a.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512h res; +volatile __m512h x1; +volatile __mmask32 m32; + +void extern +avx512f_test (void) +{ + res = _mm512_sqrt_ph (x1); + res = _mm512_mask_sqrt_ph (res, m32, x1); + res = _mm512_maskz_sqrt_ph (m32, x1); + res = _mm512_sqrt_round_ph (x1, 4); + res = _mm512_mask_sqrt_round_ph (res, m32, x1, 8); + res = _mm512_maskz_sqrt_round_ph (m32, x1, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1b.c new file mode 100644 index 0000000..d4d047b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtph-1b.c @@ -0,0 +1,92 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS (AVX512F_LEN / 16) + +void NOINLINE +EMULATE(sqrt_ph) (V512 * dest, V512 op1, + __mmask32 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + __mmask16 m1, m2; + + m1 = k & 0xffff; + m2 = (k >> 16) & 0xffff; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + for (i = 0; i < 16; i++) { + if (((1 << i) & m1) == 0) { + if (zero_mask) { + v5.f32[i] = 0; + } + else { + v5.u32[i] = v7.u32[i]; + } + } + else { + v5.f32[i] = sqrtf(v1.f32[i]); + } + + if (((1 << i) & m2) == 0) { + if (zero_mask) { + v6.f32[i] = 0; + } + else { + v6.u32[i] = v8.u32[i]; + } + } + else { + v6.f32[i] = sqrtf(v2.f32[i]); + } + + } + *dest = pack_twops_2ph(v5, v6); +} + +void +TEST (void) +{ + V512 res; + V512 exp; + + init_src(); + + EMULATE(sqrt_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_sqrt_ph) (HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _sqrt_ph); + + init_dest(&res, &exp); + EMULATE(sqrt_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_sqrt_ph) (HF(res), MASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_sqrt_ph); + + EMULATE(sqrt_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_sqrt_ph) (ZMASK_VALUE, HF(src1)); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_sqrt_ph); + +#if AVX512F_LEN == 512 + EMULATE(sqrt_ph) (&exp, src1, NET_MASK, 0); + HF(res) = INTRINSIC (_sqrt_round_ph) (HF(src1), 8); + CHECK_RESULT (&res, &exp, N_ELEMS, _sqrt_round_ph); + + init_dest(&res, &exp); + EMULATE(sqrt_ph) (&exp, src1, MASK_VALUE, 0); + HF(res) = INTRINSIC (_mask_sqrt_round_ph) (HF(res), MASK_VALUE, HF(src1), 8); + CHECK_RESULT (&res, &exp, N_ELEMS, _mask_sqrt_round_ph); + + EMULATE(sqrt_ph) (&exp, src1, ZMASK_VALUE, 1); + HF(res) = INTRINSIC (_maskz_sqrt_round_ph) (ZMASK_VALUE, HF(src1), 8); + CHECK_RESULT (&res, &exp, N_ELEMS, _maskz_sqrt_round_ph); +#endif + + if (n_errs != 0) + abort (); +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1a.c new file mode 100644 index 0000000..dd44534 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1a.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res, x1, x2; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res = _mm_sqrt_sh (x1, x2); + res = _mm_mask_sqrt_sh (res, m8, x1, x2); + res = _mm_maskz_sqrt_sh (m8, x1, x2); + res = _mm_sqrt_round_sh (x1, x2, 4); + res = _mm_mask_sqrt_round_sh (res, m8, x1, x2, 8); + res = _mm_maskz_sqrt_round_sh (m8, x1, x2, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1b.c new file mode 100644 index 0000000..4744c6f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vsqrtsh-1b.c @@ -0,0 +1,60 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +#define N_ELEMS 8 + +void NOINLINE +emulate_sqrt_sh(V512 * dest, V512 op1, + __mmask8 k, int zero_mask) +{ + V512 v1, v2, v3, v4, v5, v6, v7, v8; + int i; + + unpack_ph_2twops(op1, &v1, &v2); + unpack_ph_2twops(*dest, &v7, &v8); + + if ((k&1) || !k) + v5.f32[0] = sqrtf(v1.f32[0]); + else if (zero_mask) + v5.f32[0] = 0; + else + v5.f32[0] = v7.f32[0]; + + for (i = 1; i < 8; i++) + v5.f32[i] = v1.f32[i]; + + *dest = pack_twops_2ph(v5, v6); +} + +void +test_512 (void) +{ + V512 res; + V512 exp; + + init_src(); + + emulate_sqrt_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_sqrt_round_sh(exp.xmmh[0], src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_sqrt_round_sh"); + + init_dest(&res, &exp); + emulate_sqrt_sh(&exp, src1, 0x1, 0); + res.xmmh[0] = _mm_mask_sqrt_round_sh(res.xmmh[0], 0x1, exp.xmmh[0], + src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_mask_sqrt_round_sh"); + + emulate_sqrt_sh(&exp, src1, 0x1, 1); + res.xmmh[0] = _mm_maskz_sqrt_round_sh(0x1, exp.xmmh[0], src1.xmmh[0], _ROUND_NINT); + check_results(&res, &exp, N_ELEMS, "_mm_maskz_sqrt_round_sh"); + + if (n_errs != 0) { + abort (); + } + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c new file mode 100644 index 0000000..3621bb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c @@ -0,0 +1,55 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +void +test_512 (void) +{ + V512 res; + res.xmm[0] = _mm_castph_ps (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_ps"); + + res.xmmd[0] = _mm_castph_pd (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_pd"); + + res.xmmi[0] = _mm_castph_si128 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_si128"); + + res.xmmh[0] = _mm_castps_ph (src1.xmm[0]); + check_results (&res, &src1, 8, "_mm_castps_ph"); + + res.xmmh[0] = _mm_castpd_ph (src1.xmmd[0]); + check_results (&res, &src1, 8, "_mm_castpd_ph"); + + res.xmmh[0] = _mm_castsi128_ph (src1.xmmi[0]); + check_results (&res, &src1, 8, "_mm_castsi128_ph"); + + res.ymm[0] = _mm256_castph_ps (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_ps"); + + res.ymmd[0] = _mm256_castph_pd (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_pd"); + + res.ymmi[0] = _mm256_castph_si256 (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_si256"); + + res.ymmh[0] = _mm256_castps_ph (src1.ymm[0]); + check_results (&res, &src1, 16, "_mm256_castps_ph"); + + res.ymmh[0] = _mm256_castpd_ph (src1.ymmd[0]); + check_results (&res, &src1, 16, "_mm256_castpd_ph"); + + res.ymmh[0] = _mm256_castsi256_ph (src1.ymmi[0]); + check_results (&res, &src1, 16, "_mm256_castsi256_ph"); + + res.xmmh[0] = _mm256_castph256_ph128 (src1.ymmh[0]); + check_results (&res, &src1, 8, "_mm256_castph256_ph128"); + + res.ymmh[0] = _mm256_castph128_ph256 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm256_castph128_ph256"); + + if (n_errs != 0) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c new file mode 100644 index 0000000..dce387f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c @@ -0,0 +1,37 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512f-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +do_test (void) +{ + union512i_d zero; + union512h ad; + union256h b,bd; + union128h c; + + int i; + + for (i = 0; i < 16; i++) + { + b.a[i] = 65.43f + i; + zero.a[i] = 0; + } + + for (i = 0; i < 8; i++) + { + c.a[i] = 32.01f + i; + } + + bd.x = _mm256_zextph128_ph256 (c.x); + if (memcmp (bd.a, c.a, 16) + || memcmp (&bd.a[8], &zero.a, 16)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1a.c new file mode 100644 index 0000000..ab0541d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtepi32_ph (x2); + res3 = _mm256_mask_cvtepi32_ph (res3, m8, x2); + res3 = _mm256_maskz_cvtepi32_ph (m8, x2); + + res3 = _mm_cvtepi32_ph (x3); + res3 = _mm_mask_cvtepi32_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepi32_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1b.c new file mode 100644 index 0000000..033587a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtdq2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtdq2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtdq2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1a.c new file mode 100644 index 0000000..57604a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1a.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256d x2; +volatile __m128d x3; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtpd_ph (x2); + res3 = _mm256_mask_cvtpd_ph (res3, m16, x2); + res3 = _mm256_maskz_cvtpd_ph (m16, x2); + + res3 = _mm_cvtpd_ph (x3); + res3 = _mm_mask_cvtpd_ph (res3, m8, x3); + res3 = _mm_maskz_cvtpd_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1b.c new file mode 100644 index 0000000..ea4b200 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtpd2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtpd2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtpd2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1a.c new file mode 100644 index 0000000..df653b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epi32 (x3); + res1 = _mm256_mask_cvtph_epi32 (res1, m8, x3); + res1 = _mm256_maskz_cvtph_epi32 (m8, x3); + + res2 = _mm_cvtph_epi32 (x3); + res2 = _mm_mask_cvtph_epi32 (res2, m8, x3); + res2 = _mm_maskz_cvtph_epi32 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1b.c new file mode 100644 index 0000000..93a3e90 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2dq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2dq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2dq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1a.c new file mode 100644 index 0000000..80010c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256d res1; +volatile __m128d res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_pd (x3); + res1 = _mm256_mask_cvtph_pd (res1, m8, x3); + res1 = _mm256_maskz_cvtph_pd (m8, x3); + + res2 = _mm_cvtph_pd (x3); + res2 = _mm_mask_cvtph_pd (res2, m8, x3); + res2 = _mm_maskz_cvtph_pd (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1b.c new file mode 100644 index 0000000..a384905 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2pd-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2pd-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2pd-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1a.c new file mode 100644 index 0000000..e8c4c8c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256 res1; +volatile __m128 res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtxph_ps (x3); + res1 = _mm256_mask_cvtxph_ps (res1, m8, x3); + res1 = _mm256_maskz_cvtxph_ps (m8, x3); + + res2 = _mm_cvtxph_ps (x3); + res2 = _mm_mask_cvtxph_ps (res2, m8, x3); + res2 = _mm_maskz_cvtxph_ps (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1b.c new file mode 100644 index 0000000..ad91de8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2psx-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2psx-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2psx-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1a.c new file mode 100644 index 0000000..ddc6f2a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epi64 (x3); + res1 = _mm256_mask_cvtph_epi64 (res1, m8, x3); + res1 = _mm256_maskz_cvtph_epi64 (m8, x3); + + res2 = _mm_cvtph_epi64 (x3); + res2 = _mm_mask_cvtph_epi64 (res2, m8, x3); + res2 = _mm_maskz_cvtph_epi64 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1b.c new file mode 100644 index 0000000..5afc5a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2qq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2qq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2qq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1a.c new file mode 100644 index 0000000..d07d766 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epu32 (x3); + res1 = _mm256_mask_cvtph_epu32 (res1, m8, x3); + res1 = _mm256_maskz_cvtph_epu32 (m8, x3); + + res2 = _mm_cvtph_epu32 (x3); + res2 = _mm_mask_cvtph_epu32 (res2, m8, x3); + res2 = _mm_maskz_cvtph_epu32 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1b.c new file mode 100644 index 0000000..d869a0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2udq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2udq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2udq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1a.c new file mode 100644 index 0000000..26dbf22 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epu64 (x3); + res1 = _mm256_mask_cvtph_epu64 (res1, m8, x3); + res1 = _mm256_maskz_cvtph_epu64 (m8, x3); + + res2 = _mm_cvtph_epu64 (x3); + res2 = _mm_mask_cvtph_epu64 (res2, m8, x3); + res2 = _mm_maskz_cvtph_epu64 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1b.c new file mode 100644 index 0000000..d9b10a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uqq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2uqq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2uqq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1a.c new file mode 100644 index 0000000..0f9fd27 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m256h x3; +volatile __m128h x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epu16 (x3); + res1 = _mm256_mask_cvtph_epu16 (res1, m16, x3); + res1 = _mm256_maskz_cvtph_epu16 (m16, x3); + + res2 = _mm_cvtph_epu16 (x4); + res2 = _mm_mask_cvtph_epu16 (res2, m8, x4); + res2 = _mm_maskz_cvtph_epu16 (m8, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1b.c new file mode 100644 index 0000000..280dcd7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2uw-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2uw-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2uw-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1a.c new file mode 100644 index 0000000..8dee4ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m256h x3; +volatile __m128h x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvtph_epi16 (x3); + res1 = _mm256_mask_cvtph_epi16 (res1, m16, x3); + res1 = _mm256_maskz_cvtph_epi16 (m16, x3); + + res2 = _mm_cvtph_epi16 (x4); + res2 = _mm_mask_cvtph_epi16 (res2, m8, x4); + res2 = _mm_maskz_cvtph_epi16 (m8, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1b.c new file mode 100644 index 0000000..739ba64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtph2w-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2w-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtph2w-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1a.c new file mode 100644 index 0000000..a89f8c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256 x2; +volatile __m128 x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtxps_ph (x2); + res3 = _mm256_mask_cvtxps_ph (res3, m8, x2); + res3 = _mm256_maskz_cvtxps_ph (m8, x2); + + res3 = _mm_cvtxps_ph (x3); + res3 = _mm_mask_cvtxps_ph (res3, m8, x3); + res3 = _mm_maskz_cvtxps_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1b.c new file mode 100644 index 0000000..a339d0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtps2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtps2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtps2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1a.c new file mode 100644 index 0000000..8e42a4b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1a.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtepi64_ph (x2); + res3 = _mm256_mask_cvtepi64_ph (res3, m16, x2); + res3 = _mm256_maskz_cvtepi64_ph (m16, x2); + + res3 = _mm_cvtepi64_ph (x3); + res3 = _mm_mask_cvtepi64_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepi64_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1b.c new file mode 100644 index 0000000..6a4a329 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtqq2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtqq2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtqq2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1a.c new file mode 100644 index 0000000..b4c0840 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epi32 (x3); + res1 = _mm256_mask_cvttph_epi32 (res1, m8, x3); + res1 = _mm256_maskz_cvttph_epi32 (m8, x3); + + res2 = _mm_cvttph_epi32 (x3); + res2 = _mm_mask_cvttph_epi32 (res2, m8, x3); + res2 = _mm_maskz_cvttph_epi32 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1b.c new file mode 100644 index 0000000..f9d82f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2dq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2dq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2dq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1a.c new file mode 100644 index 0000000..421c688 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epi64 (x3); + res1 = _mm256_mask_cvttph_epi64 (res1, m8, x3); + res1 = _mm256_maskz_cvttph_epi64 (m8, x3); + + res2 = _mm_cvttph_epi64 (x3); + res2 = _mm_mask_cvttph_epi64 (res2, m8, x3); + res2 = _mm_maskz_cvttph_epi64 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1b.c new file mode 100644 index 0000000..323ab74 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2qq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2qq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2qq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1a.c new file mode 100644 index 0000000..60f4318 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epu32 (x3); + res1 = _mm256_mask_cvttph_epu32 (res1, m8, x3); + res1 = _mm256_maskz_cvttph_epu32 (m8, x3); + + res2 = _mm_cvttph_epu32 (x3); + res2 = _mm_mask_cvttph_epu32 (res2, m8, x3); + res2 = _mm_maskz_cvttph_epu32 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1b.c new file mode 100644 index 0000000..61365d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2udq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2udq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2udq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1a.c new file mode 100644 index 0000000..37008f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m128h x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epu64 (x3); + res1 = _mm256_mask_cvttph_epu64 (res1, m8, x3); + res1 = _mm256_maskz_cvttph_epu64 (m8, x3); + + res2 = _mm_cvttph_epu64 (x3); + res2 = _mm_mask_cvttph_epu64 (res2, m8, x3); + res2 = _mm_maskz_cvttph_epu64 (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1b.c new file mode 100644 index 0000000..6360402 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uqq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2uqq-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2uqq-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1a.c new file mode 100644 index 0000000..eafa31a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m256h x3; +volatile __m128h x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epu16 (x3); + res1 = _mm256_mask_cvttph_epu16 (res1, m16, x3); + res1 = _mm256_maskz_cvttph_epu16 (m16, x3); + + res2 = _mm_cvttph_epu16 (x4); + res2 = _mm_mask_cvttph_epu16 (res2, m8, x4); + res2 = _mm_maskz_cvttph_epu16 (m8, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1b.c new file mode 100644 index 0000000..dd5ed9d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2uw-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2uw-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2uw-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1a.c new file mode 100644 index 0000000..7476d3c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i res1; +volatile __m128i res2; +volatile __m256h x3; +volatile __m128h x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_cvttph_epi16 (x3); + res1 = _mm256_mask_cvttph_epi16 (res1, m16, x3); + res1 = _mm256_maskz_cvttph_epi16 (m16, x3); + + res2 = _mm_cvttph_epi16 (x4); + res2 = _mm_mask_cvttph_epi16 (res2, m8, x4); + res2 = _mm_maskz_cvttph_epi16 (m8, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1b.c new file mode 100644 index 0000000..7a04a6a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvttph2w-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2w-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvttph2w-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1a.c new file mode 100644 index 0000000..4fa2ab9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtepu32_ph (x2); + res3 = _mm256_mask_cvtepu32_ph (res3, m8, x2); + res3 = _mm256_maskz_cvtepu32_ph (m8, x2); + + res3 = _mm_cvtepu32_ph (x3); + res3 = _mm_mask_cvtepu32_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepu32_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1b.c new file mode 100644 index 0000000..4ea2c26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtudq2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtudq2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtudq2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1a.c new file mode 100644 index 0000000..a3ee951 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1a.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res3 = _mm256_cvtepu64_ph (x2); + res3 = _mm256_mask_cvtepu64_ph (res3, m16, x2); + res3 = _mm256_maskz_cvtepu64_ph (m16, x2); + + res3 = _mm_cvtepu64_ph (x3); + res3 = _mm_mask_cvtepu64_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepu64_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1b.c new file mode 100644 index 0000000..c747e8d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuqq2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtuqq2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtuqq2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1a.c new file mode 100644 index 0000000..59393dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res2; +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res2 = _mm256_cvtepu16_ph (x2); + res2 = _mm256_mask_cvtepu16_ph (res2, m16, x2); + res2 = _mm256_maskz_cvtepu16_ph (m16, x2); + + res3 = _mm_cvtepu16_ph (x3); + res3 = _mm_mask_cvtepu16_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepu16_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1b.c new file mode 100644 index 0000000..89d94df --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtuw2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtuw2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtuw2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1a.c new file mode 100644 index 0000000..ff5530f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res2; +volatile __m128h res3; +volatile __m256i x2; +volatile __m128i x3; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res2 = _mm256_cvtepi16_ph (x2); + res2 = _mm256_mask_cvtepi16_ph (res2, m16, x2); + res2 = _mm256_maskz_cvtepi16_ph (m16, x2); + + res3 = _mm_cvtepi16_ph (x3); + res3 = _mm_mask_cvtepi16_ph (res3, m8, x3); + res3 = _mm_maskz_cvtepi16_ph (m8, x3); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1b.c new file mode 100644 index 0000000..243e45b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vcvtw2ph-1b.c @@ -0,0 +1,15 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtw2ph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vcvtw2ph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1a.c new file mode 100644 index 0000000..897a3c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1a.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfpclassphy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclassphx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclassphy\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclassphx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h x256; +volatile __m128h x128; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m16 = _mm256_fpclass_ph_mask (x256, 13); + m8 = _mm_fpclass_ph_mask (x128, 13); + m16 = _mm256_mask_fpclass_ph_mask (2, x256, 13); + m8 = _mm_mask_fpclass_ph_mask (2, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1b.c new file mode 100644 index 0000000..6745f13 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vfpclassph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vfpclassph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vfpclassph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1a.c new file mode 100644 index 0000000..82c23b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1a.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512fp16 -O2" } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1} } */ +/* { dg-final { scan-assembler-times "vgetexpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1} } */ + +#include <immintrin.h> + +volatile __m256h xx; +volatile __m128h x2; +volatile __mmask8 m8; +volatile __mmask16 m16; + +void extern +avx512vl_test (void) +{ + xx = _mm256_getexp_ph (xx); + xx = _mm256_mask_getexp_ph (xx, m16, xx); + xx = _mm256_maskz_getexp_ph (m16, xx); + x2 = _mm_getexp_ph (x2); + x2 = _mm_mask_getexp_ph (x2, m8, x2); + x2 = _mm_maskz_getexp_ph (m8, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1b.c new file mode 100644 index 0000000..7eb4fa4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetexpph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vgetexpph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vgetexpph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1a.c new file mode 100644 index 0000000..4ce6ed5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512fp16 " } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h x, y; +volatile __m128h a, b; +volatile __mmask8 m8; +volatile __mmask16 m16; + +void extern +avx512vl_test (void) +{ + x = _mm256_getmant_ph (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + x = _mm256_mask_getmant_ph (x, m16, y, _MM_MANT_NORM_p75_1p5, + _MM_MANT_SIGN_src); + x = _mm256_maskz_getmant_ph (m16, y, _MM_MANT_NORM_p75_1p5, + _MM_MANT_SIGN_src); + a = _mm_getmant_ph (b, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); + a = _mm_mask_getmant_ph (a, m8, b, _MM_MANT_NORM_p75_1p5, + _MM_MANT_SIGN_src); + a = _mm_maskz_getmant_ph (m8, b, _MM_MANT_NORM_p75_1p5, + _MM_MANT_SIGN_src); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1b.c new file mode 100644 index 0000000..e5f8740 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vgetmantph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vgetmantph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vgetmantph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1a.c new file mode 100644 index 0000000..5894dbc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrcpph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1; +volatile __m128h x2; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_rcp_ph (x1); + res1 = _mm256_mask_rcp_ph (res1, m16, x1); + res1 = _mm256_maskz_rcp_ph (m16, x1); + + res2 = _mm_rcp_ph (x2); + res2 = _mm_mask_rcp_ph (res2, m8, x2); + res2 = _mm_maskz_rcp_ph (m8, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1b.c new file mode 100644 index 0000000..a6b1e37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrcpph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrcpph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrcpph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1a.c new file mode 100644 index 0000000..4f43abd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreduceph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m256h x2; +volatile __m128h x3; +volatile __mmask8 m8; +volatile __mmask16 m16; + +void extern +avx512fp16_test (void) +{ + x2 = _mm256_reduce_ph (x2, IMM); + x3 = _mm_reduce_ph (x3, IMM); + + x2 = _mm256_mask_reduce_ph (x2, m16, x2, IMM); + x3 = _mm_mask_reduce_ph (x3, m8, x3, IMM); + + x2 = _mm256_maskz_reduce_ph (m8, x2, IMM); + x3 = _mm_maskz_reduce_ph (m16, x3, IMM); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1b.c new file mode 100644 index 0000000..3851597 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vreduceph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vreduceph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vreduceph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1a.c new file mode 100644 index 0000000..9fcf7e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m256h x2; +volatile __m128h x3; +volatile __mmask8 m8; +volatile __mmask16 m16; + +void extern +avx512fp16_test (void) +{ + x2 = _mm256_roundscale_ph (x2, IMM); + x3 = _mm_roundscale_ph (x3, IMM); + + x2 = _mm256_mask_roundscale_ph (x2, m16, x2, IMM); + x3 = _mm_mask_roundscale_ph (x3, m8, x3, IMM); + + x2 = _mm256_maskz_roundscale_ph (m8, x2, IMM); + x3 = _mm_maskz_roundscale_ph (m16, x3, IMM); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1b.c new file mode 100644 index 0000000..04b00e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrndscaleph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrndscaleph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrndscaleph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1a.c new file mode 100644 index 0000000..a5edc17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vrsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1; +volatile __m128h x2; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_rsqrt_ph (x1); + res1 = _mm256_mask_rsqrt_ph (res1, m16, x1); + res1 = _mm256_maskz_rsqrt_ph (m16, x1); + + res2 = _mm_rsqrt_ph (x2); + res2 = _mm_mask_rsqrt_ph (res2, m8, x2); + res2 = _mm_maskz_rsqrt_ph (m8, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1b.c new file mode 100644 index 0000000..a5e796b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vrsqrtph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrsqrtph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vrsqrtph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1a.c new file mode 100644 index 0000000..22231d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefph\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1,x2; +volatile __m128h x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_scalef_ph (x1, x2); + res1 = _mm256_mask_scalef_ph (res1, m16, x1, x2); + res1 = _mm256_maskz_scalef_ph (m16, x1, x2); + + res2 = _mm_scalef_ph (x3, x4); + res2 = _mm_mask_scalef_ph (res2, m8, x3, x4); + res2 = _mm_maskz_scalef_ph (m8, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1b.c new file mode 100644 index 0000000..5c12d08 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vscalefph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vscalefph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vscalefph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1a.c new file mode 100644 index 0000000..4acb137 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vsqrtph\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256h res1; +volatile __m128h res2; +volatile __m256h x1; +volatile __m128h x2; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx512f_test (void) +{ + res1 = _mm256_sqrt_ph (x1); + res1 = _mm256_mask_sqrt_ph (res1, m16, x1); + res1 = _mm256_maskz_sqrt_ph (m16, x1); + + res2 = _mm_sqrt_ph (x2); + res2 = _mm_mask_sqrt_ph (res2, m8, x2); + res2 = _mm_maskz_sqrt_ph (m8, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1b.c new file mode 100644 index 0000000..9b0a91d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-vsqrtph-1b.c @@ -0,0 +1,16 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define DEBUG +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vsqrtph-1b.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512fp16-vsqrtph-1b.c" + diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c new file mode 100644 index 0000000..0a45f8e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c new file mode 100644 index 0000000..c8b2d1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c new file mode 100644 index 0000000..6ee565b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102080.c b/gcc/testsuite/gcc.target/i386/pr102080.c new file mode 100644 index 0000000..4c5ee32 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102080.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include<immintrin.h> +typedef float __m256 __attribute__((__vector_size__(32))); +__m256 _mm256_blendv_ps___Y, _mm256_blendv_ps___M, _mm256_mul_ps___A, + _mm256_mul_ps___B, IfThenElse___trans_tmp_9; + +void +__attribute__ ((target("avx"))) +IfThenElse (__m256 no) { + IfThenElse___trans_tmp_9 = _mm256_blendv_ps (no, _mm256_blendv_ps___Y, _mm256_blendv_ps___M); +} +void +__attribute__ ((target("avx512vl"))) +EncodedFromDisplay() { + __m256 __trans_tmp_11 = _mm256_mul_ps___A * _mm256_mul_ps___B; + IfThenElse(__trans_tmp_11); +} diff --git a/gcc/testsuite/gcc.target/i386/pr102327-1.c b/gcc/testsuite/gcc.target/i386/pr102327-1.c new file mode 100644 index 0000000..4743926 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102327-1.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ + +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef _Float16 v32hf __attribute__((vector_size (64))); + +#define VEC_EXTRACT(V,S,IDX) \ + S \ + __attribute__((noipa)) \ + vec_extract_##V##_##IDX (V v) \ + { \ + return v[IDX]; \ + } + +#define VEC_SET(V,S,IDX) \ + V \ + __attribute__((noipa)) \ + vec_set_##V##_##IDX (V v, S s) \ + { \ + v[IDX] = s; \ + return v; \ + } + +v8hf +vec_init_v8hf (_Float16 a1, _Float16 a2, _Float16 a3, _Float16 a4, _Float16 a5, +_Float16 a6, _Float16 a7, _Float16 a8) +{ + return __extension__ (v8hf) {a1, a2, a3, a4, a5, a6, a7, a8}; +} + +/* { dg-final { scan-assembler-times "vpunpcklwd" 4 } } */ +/* { dg-final { scan-assembler-times "vpunpckldq" 2 } } */ +/* { dg-final { scan-assembler-times "vpunpcklqdq" 1 } } */ + +VEC_EXTRACT (v8hf, _Float16, 4); +VEC_EXTRACT (v16hf, _Float16, 3); +VEC_EXTRACT (v16hf, _Float16, 8); +VEC_EXTRACT (v16hf, _Float16, 15); +VEC_EXTRACT (v32hf, _Float16, 5); +VEC_EXTRACT (v32hf, _Float16, 8); +VEC_EXTRACT (v32hf, _Float16, 14); +VEC_EXTRACT (v32hf, _Float16, 16); +VEC_EXTRACT (v32hf, _Float16, 24); +VEC_EXTRACT (v32hf, _Float16, 28); +/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$8" 2 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$6" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$14" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$10" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$12" 1 } } */ +/* { dg-final { scan-assembler-times "vextract" 9 } } */ + +VEC_SET (v8hf, _Float16, 4); +VEC_SET (v16hf, _Float16, 3); +VEC_SET (v16hf, _Float16, 8); +VEC_SET (v16hf, _Float16, 15); +VEC_SET (v32hf, _Float16, 5); +VEC_SET (v32hf, _Float16, 8); +VEC_SET (v32hf, _Float16, 14); +VEC_SET (v32hf, _Float16, 16); +VEC_SET (v32hf, _Float16, 24); +VEC_SET (v32hf, _Float16, 28); +/* { dg-final { scan-assembler-times "vpbroadcastw" 10 } } */ +/* { dg-final { scan-assembler-times "vpblendw" 4 } } */ +/* { dg-final { scan-assembler-times "vpblendd" 3 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102327-2.c b/gcc/testsuite/gcc.target/i386/pr102327-2.c new file mode 100644 index 0000000..363e4b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102327-2.c @@ -0,0 +1,95 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512fp16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512VL +#define AVX512FP16 + +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK +#include "pr102327-1.c" + +#define RUNCHECK_VEC_EXTRACT(U,V,S,IDX) \ + do \ + { \ + S tmp = vec_extract_##V##_##IDX ((V)U.x); \ + if (tmp != U.a[IDX]) \ + abort(); \ + } \ + while (0) + +#define RUNCHECK_VEC_SET(UTYPE,U,V,S,IDX,NUM) \ + do \ + { \ + S tmp = 3.0f; \ + UTYPE res; \ + res.x = vec_set_##V##_##IDX ((V)U.x, tmp); \ + for (int i = 0; i != NUM; i++) \ + if (i == IDX) \ + { \ + if (res.a[i] != tmp) \ + abort (); \ + } \ + else if (res.a[i] != U.a[i]) \ + abort(); \ + } \ + while (0) + +void +test_256 (void) +{ + union512h g1; + union256h t1; + union128h x1; + int sign = 1; + + int i = 0; + for (i = 0; i < 32; i++) + { + g1.a[i] = 56.78 * (i - 30) * sign; + sign = -sign; + } + + for (i = 0; i != 16; i++) + { + t1.a[i] = 90.12 * (i + 40) * sign; + sign = -sign; + } + + for (i = 0; i != 8; i++) + { + x1.a[i] = 90.12 * (i + 40) * sign; + sign = -sign; + } + + RUNCHECK_VEC_EXTRACT (x1, v8hf, _Float16, 4); + RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 3); + RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 8); + RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 15); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 5); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 8); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 14); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 16); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 24); + RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 28); + + RUNCHECK_VEC_SET (union128h, x1, v8hf, _Float16, 4, 8); + RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 3, 16); + RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 8, 16); + RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 15, 16); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 5, 32); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 8, 32); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 14, 32); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 16, 32); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 24, 32); + RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 28, 32); +} + +void +test_128() +{ +} diff --git a/gcc/testsuite/gcc.target/i386/pr91103-1.c b/gcc/testsuite/gcc.target/i386/pr91103-1.c index 11caaa8..2d78a6d 100644 --- a/gcc/testsuite/gcc.target/i386/pr91103-1.c +++ b/gcc/testsuite/gcc.target/i386/pr91103-1.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "valign\[dq\]" 16 } } */ +/* { dg-final { scan-assembler-times "valign\[dq\]" 8 } } */ +/* { dg-final { scan-assembler-times "vextract" 12 } } */ typedef float v8sf __attribute__((vector_size(32))); typedef float v16sf __attribute__((vector_size(64))); @@ -23,9 +24,13 @@ EXTRACT (v8sf, float, 4); EXTRACT (v8sf, float, 7); EXTRACT (v8si, int, 4); EXTRACT (v8si, int, 7); +EXTRACT (v16sf, float, 4); EXTRACT (v16sf, float, 8); +EXTRACT (v16sf, float, 12); EXTRACT (v16sf, float, 15); +EXTRACT (v16si, int, 4); EXTRACT (v16si, int, 8); +EXTRACT (v16si, int, 12); EXTRACT (v16si, int, 15); EXTRACT (v4df, double, 2); EXTRACT (v4df, double, 3); diff --git a/gcc/testsuite/gcc.target/i386/pr91103-2.c b/gcc/testsuite/gcc.target/i386/pr91103-2.c index 010e477..a928d87 100644 --- a/gcc/testsuite/gcc.target/i386/pr91103-2.c +++ b/gcc/testsuite/gcc.target/i386/pr91103-2.c @@ -61,9 +61,13 @@ RUNCHECK (f2, v8sf, float, 4); RUNCHECK (f2, v8sf, float, 7); RUNCHECK (di2, v8si, int, 4); RUNCHECK (di2, v8si, int, 7); +RUNCHECK (f1, v16sf, float, 4); RUNCHECK (f1, v16sf, float, 8); +RUNCHECK (f1, v16sf, float, 12); RUNCHECK (f1, v16sf, float, 15); +RUNCHECK (di1, v16si, int, 4); RUNCHECK (di1, v16si, int, 8); +RUNCHECK (di1, v16si, int, 12); RUNCHECK (di1, v16si, int, 15); RUNCHECK (d2, v4df, double, 2); RUNCHECK (d2, v4df, double, 3); diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 9a2833d..dd33993 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -703,25 +703,85 @@ #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) /* avx512fp16intrin.h */ -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8) -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D) +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8) +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8) +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D) +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D) +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D) +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C) +#define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U) +#define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8) +#define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) +#define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) +#define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8) +#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8) +#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8) +#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) /* avx512fp16vlintrin.h */ -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D) +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D) +#define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C) +#define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C) +#define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D) +#define __builtin_ia32_getmantph128_mask(A, E, C, D) __builtin_ia32_getmantph128_mask(A, 1, C, D) /* vpclmulqdqintrin.h */ #define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index ce0ad71..e64321d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -670,6 +670,52 @@ test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 8) test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8) /* avx512fp16intrin.h */ +test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8) +test_1 (_mm_reduce_ph, __m128h, __m128h, 123) +test_1 (_mm256_reduce_ph, __m256h, __m256h, 123) +test_1 (_mm512_reduce_ph, __m512h, __m512h, 123) +test_1 (_mm_roundscale_ph, __m128h, __m128h, 123) +test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123) +test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) +test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_pd, __m512d, __m128h, 8) +test_1 (_mm512_cvtx_roundph_ps, __m512, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8) +test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8) +test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) +test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) +test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvtx_roundps_ph, __m256h, __m512, 8) +test_1 (_mm512_cvt_roundpd_ph, __m128h, __m512d, 8) +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) +test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8) +#ifdef __x86_64__ +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) +test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8) +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) +#endif +test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) +test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) +test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) +test_1y (_mm512_getmant_round_ph, __m512h, __m512h, 1, 1, 8) test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8) test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8) test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8) @@ -684,9 +730,59 @@ test_2 (_mm_max_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm_min_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1) test_2 (_mm_comi_sh, int, __m128h, __m128h, 1) +test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8) +test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8) +test_2 (_mm_scalef_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123) +test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123) +test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123) +test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123) +test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123) +test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123) +test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123) +test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123) +test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) +test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_pd, __m512d, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtx_roundph_ps, __m512, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8) +test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8) +test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8) +test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8) +test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8) +test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8) +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) +test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8) +test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8) +test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8) +test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8) +test_2x (_mm512_maskz_getmant_ph, __m512h, __mmask32, __m512h, 1, 1) +test_2x (_mm_getmant_sh, __m128h, __m128h, __m128h, 1, 1) +test_2y (_mm512_maskz_getmant_round_ph, __m512h, __mmask32, __m512h, 1, 1, 8) +test_2y (_mm_getmant_round_sh, __m128h, __m128h, __m128h, 1, 1, 8) test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) @@ -700,8 +796,56 @@ test_3 (_mm512_maskz_min_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm_maskz_max_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1) +test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) +test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) +test_3 (_mm_maskz_scalef_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) +test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123) +test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123) +test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123) +test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123) +test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123) +test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123) +test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123) +test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123) +test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) +test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_pd, __m512d, __m512d, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtx_roundph_ps, __m512, __m512, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8) +test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) +test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) +test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) +test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) +test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) +test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) +test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_3x (_mm512_mask_getmant_ph, __m512h, __m512h, __mmask32, __m512h, 1, 1) +test_3x (_mm_maskz_getmant_sh, __m128h, __mmask8, __m128h, __m128h, 1, 1) +test_3y (_mm_maskz_getmant_round_sh, __m128h, __mmask8, __m128h, __m128h, 1, 1, 8) +test_3y (_mm512_mask_getmant_round_ph, __m512h, __m512h, __mmask32, __m512h, 1, 1, 8) test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) @@ -714,6 +858,20 @@ test_4 (_mm512_mask_max_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) +test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123) +test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123) +test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) +test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) +test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) +test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) +test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) +test_4y (_mm_mask_getmant_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1, 8) /* avx512fp16vlintrin.h */ test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 4393464..d92898f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -775,6 +775,52 @@ test_2 (_mm_rsqrt28_round_sd, __m128d, __m128d, __m128d, 8) test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8) /* avx512fp16intrin.h */ +test_1 (_mm512_sqrt_round_ph, __m512h, __m512h, 8) +test_1 (_mm_reduce_ph, __m128h, __m128h, 123) +test_1 (_mm256_reduce_ph, __m256h, __m256h, 123) +test_1 (_mm512_reduce_ph, __m512h, __m512h, 123) +test_1 (_mm_roundscale_ph, __m128h, __m128h, 123) +test_1 (_mm256_roundscale_ph, __m256h, __m256h, 123) +test_1 (_mm512_roundscale_ph, __m512h, __m512h, 123) +test_1 (_mm512_getexp_round_ph, __m512h, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_cvtt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) +test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_pd, __m512d, __m128h, 8) +test_1 (_mm512_cvtx_roundph_ps, __m512, __m256h, 8) +test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8) +test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8) +test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) +test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) +test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvtx_roundps_ph, __m256h, __m512, 8) +test_1 (_mm512_cvt_roundpd_ph, __m128h, __m512d, 8) +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) +test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8) +#ifdef __x86_64__ +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) +test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8) +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) +#endif +test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) +test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) +test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) +test_1y (_mm512_getmant_round_ph, __m512h, __m512h, 1, 1, 8) test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8) test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8) test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8) @@ -789,9 +835,58 @@ test_2 (_mm_max_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm_min_round_sh, __m128h, __m128h, __m128h, 8) test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1) test_2 (_mm_comi_sh, int, __m128h, __m128h, 1) +test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8) +test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8) +test_2 (_mm_maskz_reduce_ph, __m128h, __mmask8, __m128h, 123) +test_2 (_mm256_maskz_reduce_ph, __m256h, __mmask16, __m256h, 123) +test_2 (_mm512_maskz_reduce_ph, __m512h, __mmask32, __m512h, 123) +test_2 (_mm_reduce_sh, __m128h, __m128h, __m128h, 123) +test_2 (_mm_maskz_roundscale_ph, __m128h, __mmask8, __m128h, 123) +test_2 (_mm256_maskz_roundscale_ph, __m256h, __mmask16, __m256h, 123) +test_2 (_mm512_maskz_roundscale_ph, __m512h, __mmask32, __m512h, 123) +test_2 (_mm_roundscale_sh, __m128h, __m128h, __m128h, 123) +test_2 (_mm512_maskz_getexp_round_ph, __m512h, __mmask32, __m512h, 8) +test_2 (_mm_getexp_round_sh, __m128h, __m128h, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_pd, __m512d, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtx_roundph_ps, __m512, __mmask16, __m256h, 8) +test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8) +test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8) +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) +test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8) +test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8) +test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8) +test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) +test_2x (_mm512_maskz_reduce_round_ph, __m512h, __mmask32, __m512h, 123, 8) +test_2x (_mm512_maskz_roundscale_round_ph, __m512h, __mmask32, __m512h, 123, 8) +test_2x (_mm_reduce_round_sh, __m128h, __m128h, __m128h, 123, 8) +test_2x (_mm_roundscale_round_sh, __m128h, __m128h, __m128h, 123, 8) +test_2x (_mm512_maskz_getmant_ph, __m512h, __mmask32, __m512h, 1, 1) +test_2x (_mm_getmant_sh, __m128h, __m128h, __m128h, 1, 1) +test_2y (_mm512_maskz_getmant_round_ph, __m512h, __mmask32, __m512h, 1, 1, 8) +test_2y (_mm_getmant_round_sh, __m128h, __m128h, __m128h, 1, 1, 8) test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) @@ -805,8 +900,55 @@ test_3 (_mm512_maskz_min_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) test_3 (_mm_maskz_max_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1) +test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) +test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8) +test_3 (_mm_mask_reduce_ph, __m128h, __m128h, __mmask8, __m128h, 123) +test_3 (_mm256_mask_reduce_ph, __m256h, __m256h, __mmask16, __m256h, 123) +test_3 (_mm512_mask_reduce_ph, __m512h, __m512h, __mmask32, __m512h, 123) +test_3 (_mm_maskz_reduce_sh, __m128h, __mmask8, __m128h, __m128h, 123) +test_3 (_mm_mask_roundscale_ph, __m128h, __m128h, __mmask8, __m128h, 123) +test_3 (_mm256_mask_roundscale_ph, __m256h, __m256h, __mmask16, __m256h, 123) +test_3 (_mm512_mask_roundscale_ph, __m512h, __m512h, __mmask32, __m512h, 123) +test_3 (_mm_maskz_roundscale_sh, __m128h, __mmask8, __m128h, __m128h, 123) +test_3 (_mm_maskz_getexp_round_sh, __m128h, __mmask8, __m128h, __m128h, 8) +test_3 (_mm512_mask_getexp_round_ph, __m512h, __m512h, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_pd, __m512d, __m512d, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtx_roundph_ps, __m512, __m512, __mmask16, __m256h, 8) +test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8) +test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) +test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) +test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) +test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) +test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) +test_3x (_mm512_mask_roundscale_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) +test_3x (_mm_maskz_reduce_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_3x (_mm_maskz_roundscale_round_sh, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_3x (_mm512_mask_getmant_ph, __m512h, __m512h, __mmask32, __m512h, 1, 1) +test_3x (_mm_maskz_getmant_sh, __m128h, __mmask8, __m128h, __m128h, 1, 1) +test_3y (_mm_maskz_getmant_round_sh, __m128h, __mmask8, __m128h, __m128h, 1, 1, 8) +test_3y (_mm512_mask_getmant_round_ph, __m512h, __m512h, __mmask32, __m512h, 1, 1, 8) test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) @@ -819,6 +961,19 @@ test_4 (_mm512_mask_max_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8) +test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123) +test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123) +test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8) +test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) +test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) +test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) +test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) +test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) +test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) +test_4y (_mm_mask_getmant_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1, 8) /* avx512fp16vlintrin.h */ test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f6768ba..9c32b7b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -704,25 +704,85 @@ #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) /* avx512fp16intrin.h */ -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8) -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8) -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D) +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8) +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8) +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8) +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D) +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D) +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8) +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D) +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8) +#define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C) +#define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U) +#define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8) +#define __builtin_ia32_getexpsh_mask_round(A, B, C, D, E) __builtin_ia32_getexpsh_mask_round(A, B, C, D, 4) +#define __builtin_ia32_getmantph512_mask(A, F, C, D, E) __builtin_ia32_getmantph512_mask(A, 1, C, D, 8) +#define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4) +#define __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2dq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2udq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2qq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uqq512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2w512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2w512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, D) __builtin_ia32_vcvttph2uw512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8) +#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8) +#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8) +#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) /* avx512fp16vlintrin.h */ -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D) +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D) +#define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C) +#define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C) +#define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D) +#define __builtin_ia32_getmantph128_mask(A, E, C, D) __builtin_ia32_getmantph128_mask(A, 1, C, D) /* vpclmulqdqintrin.h */ #define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c new file mode 100644 index 0000000..c30af69 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "cvtss2sd" } } */ +/* { dg-final { scan-assembler "cvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "cvtps2pd" } } */ +/* { dg-final { scan-assembler-not "cvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c new file mode 100644 index 0000000..b6567e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */ + +extern float f; +extern double d; + +void +foo (void) +{ + d = f; +} + +/* { dg-final { scan-assembler "cvtss2sd" } } */ +/* { dg-final { scan-assembler-not "cvtps2pd" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c new file mode 100644 index 0000000..107f724 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */ + +extern float f; +extern int i; + +void +foo (void) +{ + f = i; +} + +/* { dg-final { scan-assembler "cvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c index 0c65172..715b281 100644 --- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c @@ -5,14 +5,16 @@ void foo (__vector_quad *dst) { - __vector_quad acc; - __builtin_mma_xxsetaccz (&acc); - *dst = acc; + __vector_quad acc0, acc1; + __builtin_mma_xxsetaccz (&acc0); + __builtin_mma_xxsetaccz (&acc1); + dst[0] = acc0; + dst[1] = acc1; } /* { dg-final { scan-assembler-not {\mlxv\M} } } */ /* { dg-final { scan-assembler-not {\mlxvp\M} } } */ /* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */ -/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ diff --git a/gcc/testsuite/gcc.target/sparc/20210917-1.c b/gcc/testsuite/gcc.target/sparc/20210917-1.c new file mode 100644 index 0000000..03e8bc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/sparc/20210917-1.c @@ -0,0 +1,19 @@ +/* PR rtl-optimization/102306 */ +/* Reported by Daniel Cederman <cederman@gaisler.com> */ + +/* { dg-do compile } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O -mcpu=v8" } */ + +extern void foo (void); + +void test (volatile unsigned char *a) +{ + char b = *a; + if (!b) + return; + if (b & 2) + foo (); +} + +/* { dg-final { scan-assembler-times "ldub" 1 } } */ diff --git a/gcc/testsuite/gfortran.dg/PR100914.c b/gcc/testsuite/gfortran.dg/PR100914.c index c6bd973..ea339e7 100644 --- a/gcc/testsuite/gfortran.dg/PR100914.c +++ b/gcc/testsuite/gfortran.dg/PR100914.c @@ -5,7 +5,6 @@ #include <stdbool.h> #include <stdio.h> #include <math.h> -#include <quadmath.h> #include <ISO_Fortran_binding.h> @@ -29,7 +28,7 @@ #define CMPLXL(x, y) ((long double complex)((long double)(x) + (long double complex)I * (long double)(y))) #undef CMPLX -#define CMPLX(x, y) ((__complex128 )((double)(x) + (double complex)I * (double)(y))) +#define CMPLX(x, y) ((_Float128 _Complex )((double)(x) + (double complex)I * (double)(y))) #define N 11 #define M 7 @@ -37,7 +36,7 @@ typedef float _Complex c_float_complex; typedef double _Complex c_double_complex; typedef long double _Complex c_long_double_complex; -typedef __complex128 c_float128_complex; +typedef _Float128 _Complex c_float128_complex; bool c_vrfy_c_float_complex (const CFI_cdesc_t *restrict); diff --git a/gcc/testsuite/gfortran.dg/PR100914.f90 b/gcc/testsuite/gfortran.dg/PR100914.f90 index 64b3335..d8057fd 100644 --- a/gcc/testsuite/gfortran.dg/PR100914.f90 +++ b/gcc/testsuite/gfortran.dg/PR100914.f90 @@ -2,6 +2,7 @@ ! { dg-do run { xfail { { x86_64*-*-* i?86*-*-* } && longdouble128 } } } ! { dg-additional-sources PR100914.c } ! { dg-require-effective-target fortran_real_c_float128 } +! { dg-additional-options "-Wno-pedantic" } ! ! Test the fix for PR100914 ! diff --git a/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c b/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c index d081feb..4fcb6e2 100644 --- a/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c +++ b/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c @@ -32,7 +32,7 @@ void ctest (CFI_cdesc_t *arg_float128, CFI_cdesc_t *arg_complex128) { - check (arg_float128, sizeof (__float128), CFI_type_float128); - check (arg_complex128, sizeof (__float128) * 2, + check (arg_float128, sizeof (_Float128), CFI_type_float128); + check (arg_complex128, sizeof (_Float128) * 2, CFI_type_float128_Complex); } diff --git a/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c b/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c index a1d044b..90f0b20 100644 --- a/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c +++ b/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c @@ -23,8 +23,7 @@ static struct tc_info tc_table[] = { /* Extension types. Note there is no portable C equivalent type for CFI_type_ucs4_char type - (4-byte Unicode characters), and GCC rejects "__float128 _Complex", - so this is kind of hacky... */ + (4-byte Unicode characters), so this is kind of hacky... */ #if CFI_type_int128_t > 0 { CFI_type_int128_t, "CFI_type_int128_t", sizeof (__int128), 1 }, @@ -38,9 +37,9 @@ static struct tc_info tc_table[] = #endif #if CFI_type_float128 > 0 { CFI_type_float128, "CFI_type_float128", - sizeof (__float128), 1 }, + sizeof (_Float128), 1 }, { CFI_type_float128_Complex, "CFI_type_float128_Complex", - sizeof (__float128) * 2, 1 }, + sizeof (_Float128 _Complex), 1 }, #endif #if CFI_type_cfunptr > 0 { CFI_type_cfunptr, "CFI_type_cfunptr", diff --git a/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c b/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c index f1833aa..7eafa93 100644 --- a/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c +++ b/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c @@ -31,8 +31,8 @@ void ctest (CFI_cdesc_t *arg_float128, CFI_cdesc_t *arg_complex128) { - check (arg_float128, sizeof (__float128), CFI_type_float128); - check (arg_complex128, sizeof (__float128) * 2, + check (arg_float128, sizeof (_Float128), CFI_type_float128); + check (arg_complex128, sizeof (_Float128) * 2, CFI_type_float128_Complex); } diff --git a/gcc/testsuite/gfortran.dg/entry_25.f90 b/gcc/testsuite/gfortran.dg/entry_25.f90 new file mode 100644 index 0000000..518560a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/entry_25.f90 @@ -0,0 +1,13 @@ +! { dg-do compile } +! PR fortran/102311 - ICE during error recovery checking entry characteristics + +module m +contains + function f() ! { dg-error "mismatched characteristics" } + character(:), allocatable :: f + character(1) :: g + f = 'f' + entry g() + g = 'g' + end +end diff --git a/gcc/testsuite/gfortran.dg/goacc/unexpected-end.f90 b/gcc/testsuite/gfortran.dg/goacc/unexpected-end.f90 new file mode 100644 index 0000000..e9db47b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/unexpected-end.f90 @@ -0,0 +1,25 @@ +! PR fortran/102313 + +!$acc end ATOMIC ! { dg-error "Unexpected !.ACC END ATOMIC" } + +!$acc end DATA ! { dg-error "Unexpected !.ACC END DATA" } + +!$acc end HOST_DATA ! { dg-error "Unexpected !.ACC END HOST_DATA" } + +!$acc end KERNELS ! { dg-error "Unexpected !.ACC END KERNELS" } + +!$acc end KERNELS LOOP ! { dg-error "Unexpected !.ACC END KERNELS LOOP" } + +!$acc end LOOP ! { dg-error "Unexpected !.ACC END LOOP" } + +!$acc end PARALLEL ! { dg-error "Unexpected !.ACC END PARALLEL" } + +!$acc end PARALLEL LOOP ! { dg-error "Unexpected !.ACC END PARALLEL LOOP" } + +!$acc end SERIAL ! { dg-error "Unexpected !.ACC END SERIAL" } + +!$acc end SERIAL LOOP ! { dg-error "Unexpected !.ACC END SERIAL LOOP" } + +!$acc end EUPHORBIA LATHYRIS ! { dg-error "Unclassifiable OpenACC directive" } + +end diff --git a/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 b/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 new file mode 100644 index 0000000..d2e8daa --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 @@ -0,0 +1,123 @@ +! PR fortran/102313 + +!$omp end ATOMIC ! { dg-error "Unexpected !.OMP END ATOMIC" } + +!$omp end CRITICAL ! { dg-error "Unexpected !.OMP END CRITICAL" } + +!$omp end DISTRIBUTE ! { dg-error "Unexpected !.OMP END DISTRIBUTE" } + +!$omp end DISTRIBUTE PARALLEL DO ! { dg-error "Unexpected !.OMP END DISTRIBUTE PARALLEL DO" } + +!$omp end DISTRIBUTE PARALLEL DO SIMD ! { dg-error "Unexpected !.OMP END DISTRIBUTE PARALLEL DO SIMD" } + +!$omp end DISTRIBUTE SIMD ! { dg-error "Unexpected !.OMP END DISTRIBUTE SIMD" } + +!$omp end DO ! { dg-error "Unexpected !.OMP END DO" } + +!$omp end DO SIMD ! { dg-error "Unexpected !.OMP END DO SIMD" } + +!$omp end LOOP ! { dg-error "Unclassifiable OpenMP directive" } + +!$omp parallel loop +do i = 1, 5 +end do +!$omp end LOOP ! { dg-error "Unclassifiable OpenMP directive" } + +!$omp end MASKED ! { dg-error "Unexpected !.OMP END MASKED" } + +!$omp end MASKED TASKLOOP ! { dg-error "Unexpected !.OMP END MASKED TASKLOOP" } + +!$omp end MASKED TASKLOOP SIMD ! { dg-error "Unexpected !.OMP END MASKED TASKLOOP SIMD" } + +!$omp end MASTER ! { dg-error "Unexpected !.OMP END MASTER" } + +!$omp end MASTER TASKLOOP ! { dg-error "Unexpected !.OMP END MASTER TASKLOOP" } + +!$omp end MASTER TASKLOOP SIMD ! { dg-error "Unexpected !.OMP END MASTER TASKLOOP SIMD" } + +!$omp end ORDERED ! { dg-error "Unexpected !.OMP END ORDERED" } + +!$omp end PARALLEL ! { dg-error "Unexpected !.OMP END PARALLEL" } + +!$omp end PARALLEL DO ! { dg-error "Unexpected !.OMP END PARALLEL DO" } + +!$omp end PARALLEL DO SIMD ! { dg-error "Unexpected !.OMP END PARALLEL DO SIMD" } + +!$omp loop +!$omp end PARALLEL LOOP ! { dg-error "Unexpected junk" } + +!$omp end PARALLEL MASKED ! { dg-error "Unexpected !.OMP END PARALLEL MASKED" } + +!$omp end PARALLEL MASKED TASKLOOP ! { dg-error "Unexpected !.OMP END PARALLEL MASKED TASKLOOP" } + +!$omp end PARALLEL MASKED TASKLOOP SIMD ! { dg-error "Unexpected !.OMP END PARALLEL MASKED TASKLOOP SIMD" } + +!$omp end PARALLEL MASTER ! { dg-error "Unexpected !.OMP END PARALLEL MASTER" } + +!$omp end PARALLEL MASTER TASKLOOP ! { dg-error "Unexpected !.OMP END PARALLEL MASTER TASKLOOP" } + +!$omp end PARALLEL MASTER TASKLOOP SIMD ! { dg-error "Unexpected !.OMP END PARALLEL MASTER TASKLOOP SIMD" } + +!$omp end PARALLEL SECTIONS ! { dg-error "Unexpected !.OMP END PARALLEL SECTIONS" } + +!$omp end PARALLEL WORKSHARE ! { dg-error "Unexpected !.OMP END PARALLEL WORKSHARE" } + +!$omp end SCOPE ! { dg-error "Unexpected !.OMP END SCOPE" } + +!$omp end SECTIONS ! { dg-error "Unexpected !.OMP END SECTIONS" } + +!$omp end SIMD ! { dg-error "Unexpected !.OMP END SIMD" } + +!$omp end SINGLE ! { dg-error "Unexpected !.OMP END SINGLE" } + +!$omp end TARGET ! { dg-error "Unexpected !.OMP END TARGET" } + +!$omp end TARGET DATA ! { dg-error "Unexpected !.OMP END TARGET DATA" } + +!$omp end TARGET PARALLEL ! { dg-error "Unexpected !.OMP END TARGET PARALLEL" } + +!$omp end TARGET PARALLEL DO ! { dg-error "Unexpected !.OMP END TARGET PARALLEL DO" } + +!$omp end TARGET PARALLEL DO SIMD ! { dg-error "Unexpected !.OMP END TARGET PARALLEL DO SIMD" } + +!$omp end TARGET PARALLEL LOOP ! { dg-error "Unexpected junk" } + +!$omp end TARGET SIMD ! { dg-error "Unexpected !.OMP END TARGET SIMD" } + +!$omp end TARGET TEAMS ! { dg-error "Unexpected !.OMP END TARGET TEAMS" } + +!$omp end TARGET TEAMS DISTRIBUTE ! { dg-error "Unexpected !.OMP END TARGET TEAMS DISTRIBUTE" } + +!$omp end TARGET TEAMS DISTRIBUTE PARALLEL DO ! { dg-error "Unexpected !.OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO" } + +!$omp end TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD ! { dg-error "Unexpected !.OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD" } + +!$omp end TARGET TEAMS DISTRIBUTE SIMD ! { dg-error "Unexpected !.OMP END TARGET TEAMS DISTRIBUTE SIMD" } + +!$omp end TARGET TEAMS LOOP ! { dg-error "Unexpected junk" } + +!$omp end TASK ! { dg-error "Unexpected !.OMP END TASK" } + +!$omp end TASKGROUP ! { dg-error "Unexpected !.OMP END TASKGROUP" } + +!$omp end TASKLOOP ! { dg-error "Unexpected !.OMP END TASKLOOP" } + +!$omp end TASKLOOP SIMD ! { dg-error "Unexpected !.OMP END TASKLOOP SIMD" } + +!$omp end TEAMS ! { dg-error "Unexpected !.OMP END TEAMS" } + +!$omp end TEAMS DISTRIBUTE ! { dg-error "Unexpected !.OMP END TEAMS DISTRIBUTE" } + +!$omp end TEAMS DISTRIBUTE PARALLEL DO ! { dg-error "Unexpected !.OMP END TEAMS DISTRIBUTE PARALLEL DO" } + +!$omp end TEAMS DISTRIBUTE PARALLEL DO SIMD ! { dg-error "Unexpected !.OMP END TEAMS DISTRIBUTE PARALLEL DO SIMD" } + +!$omp end TEAMS DISTRIBUTE SIMD ! { dg-error "Unexpected !.OMP END TEAMS DISTRIBUTE SIMD" } + +!$omp end TEAMS LOOP ! { dg-error "Unexpected junk" } + +!$omp end WORKSHARE ! { dg-error "Unexpected !.OMP END WORKSHARE" } + +end ! { dg-error "Unexpected END statement" } + +! { dg-excess-errors "Unexpected end of file" } diff --git a/gcc/testsuite/gfortran.dg/intent_out_14.f90 b/gcc/testsuite/gfortran.dg/intent_out_14.f90 new file mode 100644 index 0000000..e599463 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/intent_out_14.f90 @@ -0,0 +1,24 @@ +! { dg-do run } +! PR fortran/102287 - optional allocatable DT array arguments (intent out) + +module m + type t + integer, allocatable :: a + end type t +contains + subroutine a (x, v) + type(t), optional, allocatable, intent(out) :: x(:) + type(t), optional, intent(out) :: v(:) + call b (x, v) + end subroutine a + + subroutine b (y, w) + type(t), optional, allocatable, intent(out) :: y(:) + type(t), optional, intent(out) :: w(:) + end subroutine b +end module m + +program p + use m + call a () +end diff --git a/gcc/testsuite/gnat.dg/enum_rep2.adb b/gcc/testsuite/gnat.dg/enum_rep2.adb new file mode 100644 index 0000000..6554ad4 --- /dev/null +++ b/gcc/testsuite/gnat.dg/enum_rep2.adb @@ -0,0 +1,117 @@ +-- { dg-do compile } + +with Ada.Integer_Text_IO; +with Ada.Text_IO; + +procedure Enum_Rep2 is + + type T is + (E80, E81, E82, E83, E84, E85, E86, E87, E88, E89, E8A, E8B, E8C, E8D, E8E, E8F, + E90, E91, E92, E93, E94, E95, E96, E97, E98, E99, E9A, E9B, E9C, E9D, E9E, E9F, + EA0, EA1, EA2, EA3, EA4, EA5, EA6, EA7, EA8, EA9, EAA, EAB, EAC, EAD, EAE, EAF, + EB0, EB1, EB2, EB3, EB4, EB5, EB6, EB7, EB8, EB9, EBA, EBB, EBC, EBD, EBE, EBF, + EC0, EC1, EC2, EC3, EC4, EC5, EC6, EC7, EC8, EC9, ECA, ECB, ECC, ECD, ECE, ECF, + ED0, ED1, ED2, ED3, ED4, ED5, ED6, ED7, ED8, ED9, EDA, EDB, EDC, EDD, EDE, EDF, + EE0, EE1, EE2, EE3, EE4, EE5, EE6, EE7, EE8, EE9, EEA, EEB, EEC, EED, EEE, EEF, + EF0, EF1, EF2, EF3, EF4, EF5, EF6, EF7, EF8, EF9, EFA, EFB, EFC, EFD, EFE, EFF, + E00, E01, E02, E03, E04, E05, E06, E07, E08, E09, E0A, E0B, E0C, E0D, E0E, E0F, + E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E1A, E1B, E1C, E1D, E1E, E1F, + E20, E21, E22, E23, E24, E25, E26, E27, E28, E29, E2A, E2B, E2C, E2D, E2E, E2F, + E30, E31, E32, E33, E34, E35, E36, E37, E38, E39, E3A, E3B, E3C, E3D, E3E, E3F, + E40, E41, E42, E43, E44, E45, E46, E47, E48, E49, E4A, E4B, E4C, E4D, E4E, E4F, + E50, E51, E52, E53, E54, E55, E56, E57, E58, E59, E5A, E5B, E5C, E5D, E5E, E5F, + E60, E61, E62, E63, E64, E65, E66, E67, E68, E69, E6A, E6B, E6C, E6D, E6E, E6F, + E70, E71, E72, E73, E74, E75, E76, E77, E78, E79, E7A, E7B, E7C, E7D, E7E, E7F); + for T use + (E80 => -16#80#, E81 => -16#7F#, E82 => -16#7E#, E83 => -16#7D#, + E84 => -16#7C#, E85 => -16#7B#, E86 => -16#7A#, E87 => -16#79#, + E88 => -16#78#, E89 => -16#77#, E8A => -16#76#, E8B => -16#75#, + E8C => -16#74#, E8D => -16#73#, E8E => -16#72#, E8F => -16#71#, + + E90 => -16#70#, E91 => -16#6F#, E92 => -16#6E#, E93 => -16#6D#, + E94 => -16#6C#, E95 => -16#6B#, E96 => -16#6A#, E97 => -16#69#, + E98 => -16#68#, E99 => -16#67#, E9A => -16#66#, E9B => -16#65#, + E9C => -16#64#, E9D => -16#63#, E9E => -16#62#, E9F => -16#61#, + + EA0 => -16#60#, EA1 => -16#5F#, EA2 => -16#5E#, EA3 => -16#5D#, + EA4 => -16#5C#, EA5 => -16#5B#, EA6 => -16#5A#, EA7 => -16#59#, + EA8 => -16#58#, EA9 => -16#57#, EAA => -16#56#, EAB => -16#55#, + EAC => -16#54#, EAD => -16#53#, EAE => -16#52#, EAF => -16#51#, + + EB0 => -16#50#, EB1 => -16#4F#, EB2 => -16#4E#, EB3 => -16#4D#, + EB4 => -16#4C#, EB5 => -16#4B#, EB6 => -16#4A#, EB7 => -16#49#, + EB8 => -16#48#, EB9 => -16#47#, EBA => -16#46#, EBB => -16#45#, + EBC => -16#44#, EBD => -16#43#, EBE => -16#42#, EBF => -16#41#, + + EC0 => -16#40#, EC1 => -16#3F#, EC2 => -16#3E#, EC3 => -16#3D#, + EC4 => -16#3C#, EC5 => -16#3B#, EC6 => -16#3A#, EC7 => -16#39#, + EC8 => -16#38#, EC9 => -16#37#, ECA => -16#36#, ECB => -16#35#, + ECC => -16#34#, ECD => -16#33#, ECE => -16#32#, ECF => -16#31#, + + ED0 => -16#30#, ED1 => -16#2F#, ED2 => -16#2E#, ED3 => -16#2D#, + ED4 => -16#2C#, ED5 => -16#2B#, ED6 => -16#2A#, ED7 => -16#29#, + ED8 => -16#28#, ED9 => -16#27#, EDA => -16#26#, EDB => -16#25#, + EDC => -16#24#, EDD => -16#23#, EDE => -16#22#, EDF => -16#21#, + + EE0 => -16#20#, EE1 => -16#1F#, EE2 => -16#1E#, EE3 => -16#1D#, + EE4 => -16#1C#, EE5 => -16#1B#, EE6 => -16#1A#, EE7 => -16#19#, + EE8 => -16#18#, EE9 => -16#17#, EEA => -16#16#, EEB => -16#15#, + EEC => -16#14#, EED => -16#13#, EEE => -16#12#, EEF => -16#11#, + + EF0 => -16#10#, EF1 => -16#0F#, EF2 => -16#0E#, EF3 => -16#0D#, + EF4 => -16#0C#, EF5 => -16#0B#, EF6 => -16#0A#, EF7 => -16#09#, + EF8 => -16#08#, EF9 => -16#07#, EFA => -16#06#, EFB => -16#05#, + EFC => -16#04#, EFD => -16#03#, EFE => -16#02#, EFF => -16#01#, + + E00 => 16#00#, E01 => 16#01#, E02 => 16#02#, E03 => 16#03#, + E04 => 16#04#, E05 => 16#05#, E06 => 16#06#, E07 => 16#07#, + E08 => 16#08#, E09 => 16#09#, E0A => 16#0A#, E0B => 16#0B#, + E0C => 16#0C#, E0D => 16#0D#, E0E => 16#0E#, E0F => 16#0F#, + + E10 => 16#10#, E11 => 16#11#, E12 => 16#12#, E13 => 16#13#, + E14 => 16#14#, E15 => 16#15#, E16 => 16#16#, E17 => 16#17#, + E18 => 16#18#, E19 => 16#19#, E1A => 16#1A#, E1B => 16#1B#, + E1C => 16#1C#, E1D => 16#1D#, E1E => 16#1E#, E1F => 16#1F#, + + E20 => 16#20#, E21 => 16#21#, E22 => 16#22#, E23 => 16#23#, + E24 => 16#24#, E25 => 16#25#, E26 => 16#26#, E27 => 16#27#, + E28 => 16#28#, E29 => 16#29#, E2A => 16#2A#, E2B => 16#2B#, + E2C => 16#2C#, E2D => 16#2D#, E2E => 16#2E#, E2F => 16#2F#, + + E30 => 16#30#, E31 => 16#31#, E32 => 16#32#, E33 => 16#33#, + E34 => 16#34#, E35 => 16#35#, E36 => 16#36#, E37 => 16#37#, + E38 => 16#38#, E39 => 16#39#, E3A => 16#3A#, E3B => 16#3B#, + E3C => 16#3C#, E3D => 16#3D#, E3E => 16#3E#, E3F => 16#3F#, + + E40 => 16#40#, E41 => 16#41#, E42 => 16#42#, E43 => 16#43#, + E44 => 16#44#, E45 => 16#45#, E46 => 16#46#, E47 => 16#47#, + E48 => 16#48#, E49 => 16#49#, E4A => 16#4A#, E4B => 16#4B#, + E4C => 16#4C#, E4D => 16#4D#, E4E => 16#4E#, E4F => 16#4F#, + + E50 => 16#50#, E51 => 16#51#, E52 => 16#52#, E53 => 16#53#, + E54 => 16#54#, E55 => 16#55#, E56 => 16#56#, E57 => 16#57#, + E58 => 16#58#, E59 => 16#59#, E5A => 16#5A#, E5B => 16#5B#, + E5C => 16#5C#, E5D => 16#5D#, E5E => 16#5E#, E5F => 16#5F#, + + E60 => 16#60#, E61 => 16#61#, E62 => 16#62#, E63 => 16#63#, + E64 => 16#64#, E65 => 16#65#, E66 => 16#66#, E67 => 16#67#, + E68 => 16#68#, E69 => 16#69#, E6A => 16#6A#, E6B => 16#6B#, + E6C => 16#6C#, E6D => 16#6D#, E6E => 16#6E#, E6F => 16#6F#, + + E70 => 16#70#, E71 => 16#71#, E72 => 16#72#, E73 => 16#73#, + E74 => 16#74#, E75 => 16#75#, E76 => 16#76#, E77 => 16#77#, + E78 => 16#78#, E79 => 16#79#, E7A => 16#7A#, E7B => 16#7B#, + E7C => 16#7C#, E7D => 16#7D#, E7E => 16#7E#, E7F => 16#7F#); + for T'Size use 8; + + procedure Print (X : T) is + begin + Ada.Integer_Text_IO.Put (T'Pos (X)); + Ada.Integer_Text_IO.Put (T'Enum_Rep (X)); + Ada.Text_IO.New_Line; + end; + +begin + Print (T'First); + Print (T'Last); +end; diff --git a/gcc/testsuite/gnat.dg/zcur_attr.adb b/gcc/testsuite/gnat.dg/zcur_attr.adb new file mode 100644 index 0000000..5d15f5e --- /dev/null +++ b/gcc/testsuite/gnat.dg/zcur_attr.adb @@ -0,0 +1,8 @@ +-- { dg-do compile } +-- { dg-options "-fdump-tree-optimized" } + +package body ZCUR_Attr is + function F return Integer is (0); +end ZCUR_Attr; + +-- { dg-final { scan-tree-dump "zero_call_used_regs \[(\]\"all\"\[)\]" "optimized" } } diff --git a/gcc/testsuite/gnat.dg/zcur_attr.ads b/gcc/testsuite/gnat.dg/zcur_attr.ads new file mode 100644 index 0000000..b756cc8 --- /dev/null +++ b/gcc/testsuite/gnat.dg/zcur_attr.ads @@ -0,0 +1,4 @@ +package ZCUR_Attr is + function F return Integer; + pragma Machine_Attribute (F, "zero_call_used_regs", "all"); +end ZCUR_Attr; diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 82dc131..f11c4e6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1578,8 +1578,8 @@ proc check_effective_target_fortran_real_10 { } { # Return 1 if the target supports Fortran real kind C_FLOAT128, # 0 otherwise. This differs from check_effective_target_fortran_real_16 -# because __float128 has the additional requirement that it be the -# 128-bit IEEE encoding; even if __float128 is available in C, it may not +# because _Float128 has the additional requirement that it be the +# 128-bit IEEE encoding; even if _Float128 is available in C, it may not # have a corresponding Fortran kind on targets (PowerPC) that use some # other encoding for long double/TFmode/real(16). proc check_effective_target_fortran_real_c_float128 { } { @@ -8074,7 +8074,7 @@ proc check_effective_target_sync_int_128_runtime { } { # Note: 32bit s390 targets require -mzarch in dg-options. proc check_effective_target_sync_long_long { } { - if { [istarget i?86-*-*] || [istarget x86_64-*-*]) + if { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget aarch64*-*-*] || [istarget arm*-*-*] || [istarget alpha*-*-*] diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index f43d0f4..967d437 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -887,6 +887,39 @@ vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance) return res; } +/* Return the misalignment of DR_INFO. */ + +int +dr_misalignment (dr_vec_info *dr_info) +{ + if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)) + { + dr_vec_info *first_dr + = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt)); + int misalign = first_dr->misalignment; + gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); + if (misalign == DR_MISALIGNMENT_UNKNOWN) + return misalign; + /* vect_analyze_data_ref_accesses guarantees that DR_INIT are + INTEGER_CSTs and the first element in the group has the lowest + address. Likewise vect_compute_data_ref_alignment will + have ensured that target_alignment is constant and otherwise + set misalign to DR_MISALIGNMENT_UNKNOWN. */ + HOST_WIDE_INT diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr)) + - TREE_INT_CST_LOW (DR_INIT (first_dr->dr))); + gcc_assert (diff >= 0); + unsigned HOST_WIDE_INT target_alignment_c + = first_dr->target_alignment.to_constant (); + return (misalign + diff) % target_alignment_c; + } + else + { + int misalign = dr_info->misalignment; + gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); + return misalign; + } +} + /* Record the base alignment guarantee given by DRB, which occurs in STMT_INFO. */ @@ -992,7 +1025,7 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info) poly_uint64 vector_alignment = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); - DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; + SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment); /* If the main loop has peeled for alignment we have no way of knowing whether the data accesses in the epilogues are aligned. We can't at @@ -2408,7 +2441,12 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) { dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)) - vect_compute_data_ref_alignment (loop_vinfo, dr_info); + { + if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt) + && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt) + continue; + vect_compute_data_ref_alignment (loop_vinfo, dr_info); + } } return opt_result::success (); @@ -2420,13 +2458,9 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) static bool vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node) { - /* We vectorize from the first scalar stmt in the node unless - the node is permuted in which case we start from the first - element in the group. */ + /* Alignment is maintained in the first element of the group. */ stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; - dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); - if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) - first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info); + first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info); /* We need to commit to a vector type for the group now. */ if (is_a <bb_vec_info> (vinfo) @@ -2440,22 +2474,8 @@ vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node) } dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info); - vect_compute_data_ref_alignment (vinfo, dr_info); - /* In several places we need alignment of the first element anyway. */ - if (dr_info != first_dr_info) - vect_compute_data_ref_alignment (vinfo, first_dr_info); - - /* For creating the data-ref pointer we need alignment of the - first element as well. */ - first_stmt_info - = vect_stmt_to_vectorize (vect_find_first_scalar_stmt_in_slp (node)); - if (first_stmt_info != SLP_TREE_SCALAR_STMTS (node)[0]) - { - first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); - if (dr_info != first_dr_info) - vect_compute_data_ref_alignment (vinfo, first_dr_info); - } - + if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED) + vect_compute_data_ref_alignment (vinfo, dr_info); return true; } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c9dcc64..5a5b8da 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7755,11 +7755,20 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, (reduc_info), &stmts); } - if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) - def = gimple_convert (&stmts, vectype_out, def); + /* The epilogue loop might use a different vector mode, like + VNx2DI vs. V2DI. */ + if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def))) + { + tree reduc_type = build_vector_type_for_mode + (TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out)); + def = gimple_convert (&stmts, reduc_type, def); + } /* Adjust the input so we pick up the partially reduced value for the skip edge in vect_create_epilog_for_reduction. */ accumulator->reduc_input = def; + /* And the reduction could be carried out using a different sign. */ + if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) + def = gimple_convert (&stmts, vectype_out, def); if (loop_vinfo->main_loop_edge) { /* While we'd like to insert on the edge this will split diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 4e0b2ad..ce79d88 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -8515,17 +8515,6 @@ vectorizable_load (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0] - for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE - which can be different when reduction chains were re-ordered. - Now that we figured we're a dataref reset stmt_info back to - SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be - refactored in a way to maintain the dr_vec_info pointer for the - relevant access explicitely. */ - stmt_vec_info orig_stmt_info = stmt_info; - if (slp_node) - stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; - tree mask = NULL_TREE, mask_vectype = NULL_TREE; if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) { @@ -8768,7 +8757,7 @@ vectorizable_load (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "Vectorizing an unaligned access.\n"); - STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type; + STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type, &gs_info, slp_node, cost_vec); return true; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 7453d2a..c4c5678 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1606,13 +1606,7 @@ set_dr_misalignment (dr_vec_info *dr_info, int val) dr_info->misalignment = val; } -inline int -dr_misalignment (dr_vec_info *dr_info) -{ - int misalign = dr_info->misalignment; - gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); - return misalign; -} +extern int dr_misalignment (dr_vec_info *dr_info); /* Reflects actual alignment of first access in the vectorized loop, taking into account peeling/versioning if applied. */ @@ -1620,7 +1614,21 @@ dr_misalignment (dr_vec_info *dr_info) #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) /* Only defined once DR_MISALIGNMENT is defined. */ -#define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) +static inline const poly_uint64 +dr_target_alignment (dr_vec_info *dr_info) +{ + if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)) + dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt)); + return dr_info->target_alignment; +} +#define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR) + +static inline void +set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val) +{ + dr_info->target_alignment = val; +} +#define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL) /* Return true if data access DR_INFO is aligned to its target alignment (which may be less than a full vector). */ @@ -10737,6 +10737,35 @@ signed_type_for (tree type) return signed_or_unsigned_type_for (0, type); } +/* - For VECTOR_TYPEs: + - The truth type must be a VECTOR_BOOLEAN_TYPE. + - The number of elements must match (known_eq). + - targetm.vectorize.get_mask_mode exists, and exactly + the same mode as the truth type. + - Otherwise, the truth type must be a BOOLEAN_TYPE + or useless_type_conversion_p to BOOLEAN_TYPE. */ +bool +is_truth_type_for (tree type, tree truth_type) +{ + machine_mode mask_mode = TYPE_MODE (truth_type); + machine_mode vmode = TYPE_MODE (type); + machine_mode tmask_mode; + + if (TREE_CODE (type) == VECTOR_TYPE) + { + if (VECTOR_BOOLEAN_TYPE_P (truth_type) + && known_eq (TYPE_VECTOR_SUBPARTS (type), + TYPE_VECTOR_SUBPARTS (truth_type)) + && targetm.vectorize.get_mask_mode (vmode).exists (&tmask_mode) + && tmask_mode == mask_mode) + return true; + + return false; + } + + return useless_type_conversion_p (boolean_type_node, truth_type); +} + /* If TYPE is a vector type, return a signed integer vector type with the same width and number of subparts. Otherwise return boolean_type_node. */ @@ -4591,6 +4591,7 @@ extern tree build_string_literal (unsigned, const char * = NULL, extern tree signed_or_unsigned_type_for (int, tree); extern tree signed_type_for (tree); extern tree unsigned_type_for (tree); +extern bool is_truth_type_for (tree, tree); extern tree truth_type_for (tree); extern tree build_pointer_type_for_mode (tree, machine_mode, bool); extern tree build_pointer_type (tree); diff --git a/gcc/varasm.c b/gcc/varasm.c index 53cf6de..2d261b3 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -5531,14 +5531,20 @@ output_constructor_regular_field (oc_local_state *local) && (!TYPE_DOMAIN (TREE_TYPE (local->field)) || !TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (local->field))))) { - fieldsize = array_size_for_constructor (local->val); + unsigned HOST_WIDE_INT fldsize + = array_size_for_constructor (local->val); + fieldsize = int_size_in_bytes (TREE_TYPE (local->val)); + /* In most cases fieldsize == fldsize as the size of the initializer + determines how many elements the flexible array member has. For + C++ fldsize can be smaller though, if the last or several last or + all initializers of the flexible array member have side-effects + and the FE splits them into dynamic initialization. */ + gcc_checking_assert (fieldsize >= fldsize); /* Given a non-empty initialization, this field had better be last. Given a flexible array member, the next field on the chain is a TYPE_DECL of the enclosing struct. */ const_tree next = DECL_CHAIN (local->field); gcc_assert (!fieldsize || !next || TREE_CODE (next) != FIELD_DECL); - tree size = TYPE_SIZE_UNIT (TREE_TYPE (local->val)); - gcc_checking_assert (compare_tree_int (size, fieldsize) == 0); } else fieldsize = tree_to_uhwi (DECL_SIZE_UNIT (local->field)); |