diff options
author | Martin Liska <mliska@suse.cz> | 2021-06-10 08:22:39 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2021-06-10 08:22:39 +0200 |
commit | 93e01322371f89c49ff0c1d2046de2654fdb797d (patch) | |
tree | fd2d9d22d11ee9aaeae540b89341ae1bf6d3aa4c | |
parent | c5ed58925a1e02ddbc27ede08bbf1740f84b08b9 (diff) | |
parent | f8b067056ba5dd53f7bc883a1f59833efc26bd3e (diff) | |
download | gcc-93e01322371f89c49ff0c1d2046de2654fdb797d.zip gcc-93e01322371f89c49ff0c1d2046de2654fdb797d.tar.gz gcc-93e01322371f89c49ff0c1d2046de2654fdb797d.tar.bz2 |
Merge branch 'master' into devel/sphinx
88 files changed, 6319 insertions, 325 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aeec6b4..fe95b63 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,302 @@ +2021-06-09 Andrew Pinski <apinski@marvell.com> + + PR tree-optimization/100925 + * match.pd (a ? CST1 : CST2): Limit transformations + that would produce a negative to integeral types only. + Change !POINTER_TYPE_P to INTEGRAL_TYPE_P also. + +2021-06-09 Jeff Law <jeffreyalaw@gmail.com> + + Revert: + 2021-06-09 Jeff Law <jeffreyalaw@gmail.com> + + * doc/tm.texi: Correctly update. + +2021-06-09 Jeff Law <jeffreyalaw@gmail.com> + + * doc/tm.texi: Correctly update. + +2021-06-09 H.J. Lu <hjl.tools@gmail.com> + + PR other/100735 + * doc/tm.texi.in (Trampolines): Add a missing blank line. + +2021-06-09 Paul Eggert <eggert@cs.ucla.edu> + + PR other/100735 + * doc/invoke.texi (Code Gen Options); Document that -fno-trampolines + and -ftrampolines work only with Ada. + * doc/tm.texi.in (Trampolines): Likewise. + * doc/tm.texi: Regenerated. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/altivec.h (vec_signextll, vec_signexti, vec_signextq): + Add define for new builtins. + * config/rs6000/altivec.md(altivec_vreveti2): Add define_expand. + * config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL): Add + overloaded builtin definitions. + (VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D, + VSIGNEXTSD2Q): Add builtin expansions. + (SIGNEXT): Add P10 overload definition. + * config/rs6000/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VEC_VSIGNEXTLL, + P10_BUILTIN_VEC_SIGNEXT): Add overloaded argument definitions. + * config/rs6000/vsx.md (vsx_sign_extend_v2di_v1ti): Add define_insn. + (vsignextend_v2di_v1ti, vsignextend_qi_<mode>, vsignextend_hi_<mode>, + vsignextend_si_v2di)[VIlong]: Add define_expand. + Make define_insn vsx_sign_extend_si_v2di visible. + * doc/extend.texi: Add documentation for the vec_signexti, + vec_signextll builtins and vec_signextq. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/rs6000.c (__fixkfti, __fixunskfti, __floattikf, + __floatuntikf): Names changed to __fixkfti_sw, __fixunskfti_sw, + __floattikf_sw, __floatuntikf_sw respectively. + * config/rs6000/rs6000.md (floatti<mode>2, floatunsti<mode>2, + fix_trunc<mode>ti2, fixuns_trunc<mode>ti2): Add + define_insn for mode IEEE 128. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/altivec.md (altivec_vslq, altivec_vsrq): + Rename to altivec_vslq_<mode>, altivec_vsrq_<mode>, mode VEC_TI. + * config/rs6000/vector.md (VEC_TI): Was named VSX_TI in vsx.md. + (vashlv1ti3): Change to vashl<mode>3, mode VEC_TI. + (vlshrv1ti3): Change to vlshr<mode>3, mode VEC_TI. + * config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator. Update + uses of VSX_TI to VEC_TI. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/dfp.md (floattitd2, fixtdti2): New define_insns. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/altivec.h (vec_dive, vec_mod): Add define for new + builtins. + * config/rs6000/altivec.md (UNSPEC_VMULEUD, UNSPEC_VMULESD, + UNSPEC_VMULOUD, UNSPEC_VMULOSD): New unspecs. + (altivec_eqv1ti, altivec_gtv1ti, altivec_gtuv1ti, altivec_vmuleud, + altivec_vmuloud, altivec_vmulesd, altivec_vmulosd, altivec_vrlq, + altivec_vrlqmi, altivec_vrlqmi_inst, altivec_vrlqnm, + altivec_vrlqnm_inst, altivec_vslq, altivec_vsrq, altivec_vsraq, + altivec_vcmpequt_p, altivec_vcmpgtst_p, altivec_vcmpgtut_p): New + define_insn. + (vec_widen_umult_even_v2di, vec_widen_smult_even_v2di, + vec_widen_umult_odd_v2di, vec_widen_smult_odd_v2di, altivec_vrlqmi, + altivec_vrlqnm): New define_expands. + * config/rs6000/rs6000-builtin.def (VCMPEQUT_P, VCMPGTST_P, + VCMPGTUT_P): Add macro expansions. + (BU_P10V_AV_P): Add builtin predicate definition. + (VCMPGTUT, VCMPGTST, VCMPEQUT, CMPNET, CMPGE_1TI, + CMPGE_U1TI, CMPLE_1TI, CMPLE_U1TI, VNOR_V1TI_UNS, VNOR_V1TI, VCMPNET_P, + VCMPAET_P, VMULEUD, VMULESD, VMULOUD, VMULOSD, VRLQ, + VSLQ, VSRQ, VSRAQ, VRLQNM, DIV_V1TI, UDIV_V1TI, DIVES_V1TI, DIVEU_V1TI, + MODS_V1TI, MODU_V1TI, VRLQMI): New macro expansions. + (VRLQ, VSLQ, VSRQ, VSRAQ, DIVE, MOD): New overload expansions. + * config/rs6000/rs6000-call.c (P10_BUILTIN_VCMPEQUT, + P10V_BUILTIN_CMPGE_1TI, P10V_BUILTIN_CMPGE_U1TI, + P10V_BUILTIN_VCMPGTUT, P10V_BUILTIN_VCMPGTST, + P10V_BUILTIN_CMPLE_1TI, P10V_BUILTIN_VCMPLE_U1TI, + P10V_BUILTIN_DIV_V1TI, P10V_BUILTIN_UDIV_V1TI, + P10V_BUILTIN_VMULESD, P10V_BUILTIN_VMULEUD, + P10V_BUILTIN_VMULOSD, P10V_BUILTIN_VMULOUD, + P10V_BUILTIN_VNOR_V1TI, P10V_BUILTIN_VNOR_V1TI_UNS, + P10V_BUILTIN_VRLQ, P10V_BUILTIN_VRLQMI, + P10V_BUILTIN_VRLQNM, P10V_BUILTIN_VSLQ, + P10V_BUILTIN_VSRQ, P10V_BUILTIN_VSRAQ, + P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P, + P10V_BUILTIN_VCMPEQUT_P, P10V_BUILTIN_VCMPGTUT_P, + P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_CMPNET, + P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P, + P10V_BUILTIN_DIVES_V1TI, P10V_BUILTIN_MODS_V1TI, + P10V_BUILTIN_MODU_V1TI): + New overloaded definitions. + (rs6000_gimple_fold_builtin) [P10V_BUILTIN_VCMPEQUT, + P10V_BUILTIN_CMPNET, P10V_BUILTIN_CMPGE_1TI, + P10V_BUILTIN_CMPGE_U1TI, P10V_BUILTIN_VCMPGTUT, + P10V_BUILTIN_VCMPGTST, P10V_BUILTIN_CMPLE_1TI, + P10V_BUILTIN_CMPLE_U1TI]: New case statements. + (rs6000_init_builtins) [bool_V1TI_type_node, int_ftype_int_v1ti_v1ti]: + New assignments. + (altivec_init_builtins): New E_V1TImode case statement. + (builtin_function_type)[P10_BUILTIN_128BIT_VMULEUD, + P10_BUILTIN_128BIT_VMULOUD, P10_BUILTIN_128BIT_DIVEU_V1TI, + P10_BUILTIN_128BIT_MODU_V1TI, P10_BUILTIN_CMPGE_U1TI, + P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPEQUT]: New case statements. + * config/rs6000/rs6000.c (rs6000_handle_altivec_attribute) [E_TImode, + E_V1TImode]: New case statements. + * config/rs6000/rs6000.h (rs6000_builtin_type_index): New enum + value RS6000_BTI_bool_V1TI. + * config/rs6000/vector.md (vector_gtv1ti,vector_nltv1ti, + vector_gtuv1ti, vector_nltuv1ti, vector_ngtv1ti, vector_ngtuv1ti, + vector_eq_v1ti_p, vector_ne_v1ti_p, vector_ae_v1ti_p, + vector_gt_v1ti_p, vector_gtu_v1ti_p, vrotlv1ti3, vashlv1ti3, + vlshrv1ti3, vashrv1ti3): New define_expands. + * config/rs6000/vsx.md (UNSPEC_VSX_DIVSQ, UNSPEC_VSX_DIVUQ, + UNSPEC_VSX_DIVESQ, UNSPEC_VSX_DIVEUQ, UNSPEC_VSX_MODSQ, + UNSPEC_VSX_MODUQ): New unspecs. + (mulv2di3, vsx_div_v1ti, vsx_udiv_v1ti, vsx_dives_v1ti, + vsx_diveu_v1ti, vsx_mods_v1ti, vsx_modu_v1ti, xxswapd_v1ti): New + define_insns. + (vcmpnet): New define_expand. + * doc/extend.texi: Add documentation for the new builtins vec_rl, + vec_rlmi, vec_rlnm, vec_sl, vec_sr, vec_sra, vec_mule, vec_mulo, + vec_div, vec_dive, vec_mod, vec_cmpeq, vec_cmpne, vec_cmpgt, vec_cmplt, + vec_cmpge, vec_cmple, vec_all_eq, vec_all_ne, vec_all_gt, vec_all_lt, + vec_all_ge, vec_all_le, vec_any_eq, vec_any_ne, vec_any_gt, vec_any_lt, + vec_any_ge, vec_any_le. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * config/rs6000/altivec.md (altivec_vrl<VI_char>mi): Fix + bug in argument generation. + +2021-06-09 Christophe Lyon <christophe.lyon@linaro.org> + + * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S. + (VCLZQ): Remove. + * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix, + remove <supf> iterator. + (mve_vclzq_u<mode>): New. + * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>. + (neon_vclz<mode): Move to ... + * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove. + * config/arm/vec-common.md: ... here. Add support for MVE. + +2021-06-09 Christophe Lyon <christophe.lyon@linaro.org> + + * config/arm/mve.md (mve_vhaddq_<supf><mode>): Prefix with '@'. + (@mve_vrhaddq_<supf><mode): Likewise. + * config/arm/neon.md (neon_v<r>hadd<sup><mode>): Likewise. + * config/arm/vec-common.md (avg<mode>3_floor, uavg<mode>3_floor) + (avg<mode>3_ceil", uavg<mode>3_ceil): New patterns. + +2021-06-09 imba-tjd <109224573@qq.com> + + * doc/invoke.texi: Fix typo. + +2021-06-09 Roger Sayle <roger@nextmovesoftware.com> + + PR middle-end/53267 + * fold-const-call.c (fold_const_call_sss) [CASE_CFN_FMOD]: + Support evaluation of fmod/fmodf/fmodl at compile-time. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100981 + * tree-vect-loop.c (vect_create_epilog_for_reduction): Use + gimple_get_lhs to also handle calls. + * tree-vect-slp-patterns.c (complex_pattern::build): Transfer + reduction info. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/97832 + * tree-vectorizer.h (_slp_tree::failed): New. + * tree-vect-slp.c (_slp_tree::_slp_tree): Initialize + failed member. + (_slp_tree::~_slp_tree): Free failed. + (vect_build_slp_tree): Retain failed nodes and record + matches in them, copying that back out when running + into a cached fail. Dump start and end of discovery. + (dt_sort_cmp): New. + (vect_build_slp_tree_2): Handle associatable chains + together doing more aggressive operand swapping. + +2021-06-09 H.J. Lu <hjl.tools@gmail.com> + + PR target/100896 + * config.gcc (gcc_cv_initfini_array): Set to yes for Linux and + GNU targets. + * doc/install.texi: Require glibc 2.1 and binutils 2.12 for + Linux and GNU targets. + +2021-06-09 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.c (vect_is_simple_use): Always get dt + from the stmt. + +2021-06-09 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc.md (loop_end): Change it to + define_insn_and_split. + +2021-06-09 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc.md (maddhisi4): Use VMAC2H instruction. + (machi): New pattern. + (umaddhisi4): Use VMAC2HU instruction. + (umachi): New pattern. + +2021-06-09 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc-protos.h (arc_split_move_p): New prototype. + * config/arc/arc.c (arc_split_move_p): New function. + (arc_split_move): Clean up. + * config/arc/arc.md (movdi_insn): Clean up, use arc_split_move_p. + (movdf_insn): Likewise. + * config/arc/simdext.md (mov<VWH>_insn): Likewise. + +2021-06-09 Uroš Bizjak <ubizjak@gmail.com> + + PR target/100936 + * config/i386/i386.c (print_operand_address_as): Rename "no_rip" + argument to "raw". Do not emit segment overrides when "raw" is true. + +2021-06-09 Martin Liska <mliska@suse.cz> + + * doc/gcov.texi: Create a proper JSON files. + * doc/invoke.texi: Remove dots in order to make it a valid + JSON object. + +2021-06-09 Xionghu Luo <luoxhu@linux.ibm.com> + + * config/rs6000/rs6000-p8swap.c (pattern_is_rotate64): New. + (insn_is_load_p): Use pattern_is_rotate64. + (insn_is_swap_p): Likewise. + (quad_aligned_load_p): Likewise. + (const_load_sequence_p): Likewise. + (replace_swapped_aligned_load): Likewise. + (recombine_lvx_pattern): Likewise. + (recombine_stvx_pattern): Likewise. + +2021-06-09 Andrew MacLeod <amacleod@redhat.com> + + * gimple-range-gori.cc (gori_compute::outgoing_edge_range_p): Use a + fur_stmt source record. + * gimple-range.cc (fur_source::get_operand): Generic range query. + (fur_source::get_phi_operand): New. + (fur_source::register_dependency): New. + (fur_source::query): New. + (class fur_edge): New. Edge source for operands. + (fur_edge::fur_edge): New. + (fur_edge::get_operand): New. + (fur_edge::get_phi_operand): New. + (fur_edge::query): New. + (fur_stmt::fur_stmt): New. + (fur_stmt::get_operand): New. + (fur_stmt::get_phi_operand): New. + (fur_stmt::query): New. + (class fur_depend): New. Statement source and process dependencies. + (fur_depend::fur_depend): New. + (fur_depend::register_dependency): New. + (class fur_list): New. List source for operands. + (fur_list::fur_list): New. + (fur_list::get_operand): New. + (fur_list::get_phi_operand): New. + (fold_range): New. Instantiate appropriate fur_source class and fold. + (fold_using_range::range_of_range_op): Use new API. + (fold_using_range::range_of_address): Ditto. + (fold_using_range::range_of_phi): Ditto. + (imple_ranger::fold_range_internal): Use fur_depend class. + (fold_using_range::range_of_ssa_name_with_loop_info): Use new API. + * gimple-range.h (class fur_source): Now a base class. + (class fur_stmt): New. + (fold_range): New prototypes. + (fur_source::fur_source): Delete. + 2021-06-08 Andrew Pinski <apinski@marvell.com> PR tree-optimization/25290 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 217a880..04de83c 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210609 +20210610 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index c3a3d39..f2061ac 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,18 @@ +2021-06-09 David Malcolm <dmalcolm@redhat.com> + + * region-model.cc (region_model::get_lvalue_1): Make const. + (region_model::get_lvalue): Likewise. + (region_model::get_rvalue_1): Likewise. + (region_model::get_rvalue): Likewise. + (region_model::deref_rvalue): Likewise. + (region_model::get_rvalue_for_bits): Likewise. + * region-model.h (region_model::get_lvalue): Likewise. + (region_model::get_rvalue): Likewise. + (region_model::deref_rvalue): Likewise. + (region_model::get_rvalue_for_bits): Likewise. + (region_model::get_lvalue_1): Likewise. + (region_model::get_rvalue_1): Likewise. + 2021-06-08 David Malcolm <dmalcolm@redhat.com> PR analyzer/99212 diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 0d363fb..551ee79 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1213,7 +1213,7 @@ region_model::handle_phi (const gphi *phi, emitting any diagnostics to CTXT. */ const region * -region_model::get_lvalue_1 (path_var pv, region_model_context *ctxt) +region_model::get_lvalue_1 (path_var pv, region_model_context *ctxt) const { tree expr = pv.m_tree; @@ -1312,7 +1312,7 @@ assert_compat_types (tree src_type, tree dst_type) emitting any diagnostics to CTXT. */ const region * -region_model::get_lvalue (path_var pv, region_model_context *ctxt) +region_model::get_lvalue (path_var pv, region_model_context *ctxt) const { if (pv.m_tree == NULL_TREE) return NULL; @@ -1326,7 +1326,7 @@ region_model::get_lvalue (path_var pv, region_model_context *ctxt) recent stack frame if it's a local). */ const region * -region_model::get_lvalue (tree expr, region_model_context *ctxt) +region_model::get_lvalue (tree expr, region_model_context *ctxt) const { return get_lvalue (path_var (expr, get_stack_depth () - 1), ctxt); } @@ -1337,7 +1337,7 @@ region_model::get_lvalue (tree expr, region_model_context *ctxt) emitting any diagnostics to CTXT. */ const svalue * -region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) +region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const { gcc_assert (pv.m_tree); @@ -1441,7 +1441,7 @@ region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) emitting any diagnostics to CTXT. */ const svalue * -region_model::get_rvalue (path_var pv, region_model_context *ctxt) +region_model::get_rvalue (path_var pv, region_model_context *ctxt) const { if (pv.m_tree == NULL_TREE) return NULL; @@ -1457,7 +1457,7 @@ region_model::get_rvalue (path_var pv, region_model_context *ctxt) recent stack frame if it's a local). */ const svalue * -region_model::get_rvalue (tree expr, region_model_context *ctxt) +region_model::get_rvalue (tree expr, region_model_context *ctxt) const { return get_rvalue (path_var (expr, get_stack_depth () - 1), ctxt); } @@ -1624,7 +1624,7 @@ region_model::region_exists_p (const region *reg) const const region * region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree, - region_model_context *ctxt) + region_model_context *ctxt) const { gcc_assert (ptr_sval); gcc_assert (POINTER_TYPE_P (ptr_sval->get_type ())); @@ -1705,7 +1705,7 @@ region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree, const svalue * region_model::get_rvalue_for_bits (tree type, const region *reg, - const bit_range &bits) + const bit_range &bits) const { const svalue *sval = get_store_value (reg); if (const compound_svalue *compound_sval = sval->dyn_cast_compound_svalue ()) diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 5e43e54..e251a5b 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -501,17 +501,17 @@ class region_model int get_stack_depth () const; const frame_region *get_frame_at_index (int index) const; - const region *get_lvalue (path_var pv, region_model_context *ctxt); - const region *get_lvalue (tree expr, region_model_context *ctxt); - const svalue *get_rvalue (path_var pv, region_model_context *ctxt); - const svalue *get_rvalue (tree expr, region_model_context *ctxt); + const region *get_lvalue (path_var pv, region_model_context *ctxt) const; + const region *get_lvalue (tree expr, region_model_context *ctxt) const; + const svalue *get_rvalue (path_var pv, region_model_context *ctxt) const; + const svalue *get_rvalue (tree expr, region_model_context *ctxt) const; const region *deref_rvalue (const svalue *ptr_sval, tree ptr_tree, - region_model_context *ctxt); + region_model_context *ctxt) const; const svalue *get_rvalue_for_bits (tree type, const region *reg, - const bit_range &bits); + const bit_range &bits) const; void set_value (const region *lhs_reg, const svalue *rhs_sval, region_model_context *ctxt); @@ -585,8 +585,8 @@ class region_model void loop_replay_fixup (const region_model *dst_state); private: - const region *get_lvalue_1 (path_var pv, region_model_context *ctxt); - const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt); + const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const; + const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const; path_var get_representative_path_var_1 (const svalue *sval, diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 8c9b355..460ced3 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-06-09 Jason Merrill <jason@redhat.com> + + PR c++/100879 + * c-warn.c (warn_for_sign_compare): Remove C++ enum mismatch + warning. + 2021-06-07 Martin Liska <mliska@suse.cz> * c-target.def: Split long lines and replace them diff --git a/gcc/config.gcc b/gcc/config.gcc index 6833a6c..4dc4fe0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -848,6 +848,8 @@ case ${target} in tmake_file="${tmake_file} t-glibc" target_has_targetcm=yes target_has_targetdm=yes + # Linux targets always support .init_array. + gcc_cv_initfini_array=yes ;; *-*-netbsd*) tm_p_file="${tm_p_file} netbsd-protos.h" diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 3042baf..5c4fe89 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1288,7 +1288,7 @@ (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") + (VREV32Q_U "u") (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") @@ -1538,7 +1538,6 @@ (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 0bfa6a9..99e46d0 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -435,16 +435,22 @@ ;; ;; [vclzq_u, vclzq_s]) ;; -(define_insn "mve_vclzq_<supf><mode>" +(define_insn "@mve_vclzq_s<mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VCLZQ)) + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vclz.i%#<V_sz_elem> %q0, %q1" [(set_attr "type" "mve_move") ]) +(define_expand "mve_vclzq_u<mode>" + [ + (set (match_operand:MVE_2 0 "s_register_operand") + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) + ] + "TARGET_HAVE_MVE" +) ;; ;; [vclsq_s]) @@ -1030,7 +1036,7 @@ ;; ;; [vhaddq_s, vhaddq_u]) ;; -(define_insn "mve_vhaddq_<supf><mode>" +(define_insn "@mve_vhaddq_<supf><mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") @@ -1652,7 +1658,7 @@ ;; ;; [vrhaddq_s, vrhaddq_u]) ;; -(define_insn "mve_vrhaddq_<supf><mode>" +(define_insn "@mve_vrhaddq_<supf><mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 077c62f..0fdffaf 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1488,7 +1488,7 @@ ; vhadd and vrhadd. -(define_insn "neon_v<r>hadd<sup><mode>" +(define_insn "@neon_v<r>hadd<sup><mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") (match_operand:VDQIW 2 "s_register_operand" "w")] @@ -3018,7 +3018,7 @@ [(set_attr "type" "neon_cls<q>")] ) -(define_insn "clz<mode>2" +(define_insn "neon_vclz<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" @@ -3026,15 +3026,6 @@ [(set_attr "type" "neon_cnt<q>")] ) -(define_expand "neon_vclz<mode>" - [(match_operand:VDQIW 0 "s_register_operand") - (match_operand:VDQIW 1 "s_register_operand")] - "TARGET_NEON" -{ - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); - DONE; -}) - (define_insn "popcount<mode>2" [(set (match_operand:VE 0 "s_register_operand" "=w") (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ed1bc29..ad1c6ed 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -556,8 +556,6 @@ VQABSQ_S VDUPQ_N_U VDUPQ_N_S - VCLZQ_U - VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 80b2732..430a92c 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -565,3 +565,70 @@ DONE; }) + +(define_expand "avg<mode>3_floor" + [(match_operand:MVE_2 0 "s_register_operand") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH" +{ + if (TARGET_HAVE_MVE) + emit_insn (gen_mve_vhaddq (VHADDQ_S, <MODE>mode, + operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vhadd (UNSPEC_VHADD_S, UNSPEC_VHADD_S, <MODE>mode, + operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "uavg<mode>3_floor" + [(match_operand:MVE_2 0 "s_register_operand") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH" +{ + if (TARGET_HAVE_MVE) + emit_insn (gen_mve_vhaddq (VHADDQ_U, <MODE>mode, + operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vhadd (UNSPEC_VHADD_U, UNSPEC_VHADD_U, <MODE>mode, + operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "avg<mode>3_ceil" + [(match_operand:MVE_2 0 "s_register_operand") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH" +{ + if (TARGET_HAVE_MVE) + emit_insn (gen_mve_vrhaddq (VRHADDQ_S, <MODE>mode, + operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vhadd (UNSPEC_VRHADD_S, UNSPEC_VRHADD_S, <MODE>mode, + operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "uavg<mode>3_ceil" + [(match_operand:MVE_2 0 "s_register_operand") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH" +{ + if (TARGET_HAVE_MVE) + emit_insn (gen_mve_vrhaddq (VRHADDQ_U, <MODE>mode, + operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vhadd (UNSPEC_VRHADD_U, UNSPEC_VRHADD_U, <MODE>mode, + operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "clz<mode>2" + [(set (match_operand:VDQIW 0 "s_register_operand") + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +) diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h index 807e260..38cfa9e 100644 --- a/gcc/config/rs6000/aix71.h +++ b/gcc/config/rs6000/aix71.h @@ -78,6 +78,7 @@ do { \ #undef ASM_CPU_SPEC #define ASM_CPU_SPEC \ "%{mcpu=native: %(asm_cpu_native); \ + mcpu=power10: -mpwr10; \ mcpu=power9: -mpwr9; \ mcpu=power8: -mpwr8; \ mcpu=power7: -mpwr7; \ diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h index 36c5d99..4cd27e3 100644 --- a/gcc/config/rs6000/aix72.h +++ b/gcc/config/rs6000/aix72.h @@ -78,6 +78,7 @@ do { \ #undef ASM_CPU_SPEC #define ASM_CPU_SPEC \ "%{mcpu=native: %(asm_cpu_native); \ + mcpu=power10: -mpwr10; \ mcpu=power9: -mpwr9; \ mcpu=power8: -mpwr8; \ mcpu=power7: -mpwr7; \ diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 961621a..5b631c7 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -497,6 +497,8 @@ #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx +#define vec_signexti __builtin_vec_vsignexti +#define vec_signextll __builtin_vec_vsignextll #endif @@ -715,6 +717,10 @@ __altivec_scalar_pred(vec_any_nle, #define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) #ifdef _ARCH_PWR10 +#define vec_signextq __builtin_vec_vsignextq +#define vec_dive __builtin_vec_dive +#define vec_mod __builtin_vec_mod + /* May modify these macro definitions if future capabilities overload with support for different vector argument and result types. */ #define vec_cntlzm(a, b) __builtin_altivec_vclzdm (a, b) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1351daf..dad3a07 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -39,12 +39,16 @@ UNSPEC_VMULESH UNSPEC_VMULEUW UNSPEC_VMULESW + UNSPEC_VMULEUD + UNSPEC_VMULESD UNSPEC_VMULOUB UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH UNSPEC_VMULOUW UNSPEC_VMULOSW + UNSPEC_VMULOUD + UNSPEC_VMULOSD UNSPEC_VPKPX UNSPEC_VPACK_SIGN_SIGN_SAT UNSPEC_VPACK_SIGN_UNS_SAT @@ -619,6 +623,14 @@ "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) +(define_insn "altivec_eqv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpequq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "altivec_eq<mode>" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") @@ -635,6 +647,14 @@ "vcmpgts<VI_char> %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_gtv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (gt:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpgtsq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_gtu<mode>" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v") @@ -643,6 +663,14 @@ "vcmpgtu<VI_char> %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_gtuv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v")))] + "TARGET_POWER10" + "vcmpgtuq %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_eqv4sf" [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") (eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") @@ -1693,6 +1721,19 @@ DONE; }) +(define_expand "vec_widen_umult_even_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_smult_even_v4si" [(use (match_operand:V2DI 0 "register_operand")) (use (match_operand:V4SI 1 "register_operand")) @@ -1706,6 +1747,19 @@ DONE; }) +(define_expand "vec_widen_smult_even_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_umult_odd_v16qi" [(use (match_operand:V8HI 0 "register_operand")) (use (match_operand:V16QI 1 "register_operand")) @@ -1771,6 +1825,19 @@ DONE; }) +(define_expand "vec_widen_umult_odd_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmuloud (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmuleud (operands[0], operands[1], operands[2])); + DONE; +}) + (define_expand "vec_widen_smult_odd_v4si" [(use (match_operand:V2DI 0 "register_operand")) (use (match_operand:V4SI 1 "register_operand")) @@ -1784,6 +1851,19 @@ DONE; }) +(define_expand "vec_widen_smult_odd_v2di" + [(use (match_operand:V1TI 0 "register_operand")) + (use (match_operand:V2DI 1 "register_operand")) + (use (match_operand:V2DI 2 "register_operand"))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmulosd (operands[0], operands[1], operands[2])); + else + emit_insn (gen_altivec_vmulesd (operands[0], operands[1], operands[2])); + DONE; +}) + (define_insn "altivec_vmuleub" [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v") @@ -1865,6 +1945,15 @@ "vmuleuw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuleud" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULEUD))] + "TARGET_POWER10" + "vmuleud %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulouw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1874,6 +1963,15 @@ "vmulouw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuloud" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULOUD))] + "TARGET_POWER10" + "vmuloud %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulesw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1883,6 +1981,15 @@ "vmulesw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulesd" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULESD))] + "TARGET_POWER10" + "vmulesd %0,%1,%2" + [(set_attr "type" "veccomplex")]) + (define_insn "altivec_vmulosw" [(set (match_operand:V2DI 0 "register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") @@ -1892,6 +1999,15 @@ "vmulosw %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmulosd" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v")] + UNSPEC_VMULOSD))] + "TARGET_POWER10" + "vmulosd %0,%1,%2" + [(set_attr "type" "veccomplex")]) + ;; Vector pack/unpack (define_insn "altivec_vpkpx" [(set (match_operand:V8HI 0 "register_operand" "=v") @@ -1985,14 +2101,51 @@ "vrl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vrlq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +;; rotate amount in needs to be in bits[57:63] of operand2. + "vrlq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl<VI_char>mi" [(set (match_operand:VIlong 0 "register_operand" "=v") - (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0") - (match_operand:VIlong 2 "register_operand" "v") + (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v") + (match_operand:VIlong 2 "register_operand" "0") (match_operand:VIlong 3 "register_operand" "v")] UNSPEC_VRLMI))] "TARGET_P9_VECTOR" - "vrl<VI_char>mi %0,%2,%3" + "vrl<VI_char>mi %0,%1,%3" + [(set_attr "type" "veclogical")]) + +(define_expand "altivec_vrlqmi" + [(set (match_operand:V1TI 0 "vsx_register_operand") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand") + (match_operand:V1TI 2 "vsx_register_operand") + (match_operand:V1TI 3 "vsx_register_operand")] + UNSPEC_VRLMI))] + "TARGET_POWER10" +{ + /* Mask bit begin, end fields need to be in bits [41:55] of 128-bit operand2. + Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[3])); + emit_insn (gen_altivec_vrlqmi_inst (operands[0], operands[1], operands[2], + tmp)); + DONE; +}) + +(define_insn "altivec_vrlqmi_inst" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "0") + (match_operand:V1TI 3 "vsx_register_operand" "v")] + UNSPEC_VRLMI))] + "TARGET_POWER10" + "vrlqmi %0,%1,%3" [(set_attr "type" "veclogical")]) (define_insn "altivec_vrl<VI_char>nm" @@ -2004,6 +2157,31 @@ "vrl<VI_char>nm %0,%1,%2" [(set_attr "type" "veclogical")]) +(define_expand "altivec_vrlqnm" + [(set (match_operand:V1TI 0 "vsx_register_operand") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand") + (match_operand:V1TI 2 "vsx_register_operand")] + UNSPEC_VRLNM))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlqnm_inst (operands[0], operands[1], tmp)); + DONE; +}) + +(define_insn "altivec_vrlqnm_inst" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VRLNM))] + "TARGET_POWER10" + ;; rotate and mask bits need to be in upper 64-bits of operand2. + "vrlqnm %0,%1,%2" + [(set_attr "type" "veclogical")]) + (define_insn "altivec_vsl" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2048,6 +2226,15 @@ "vsl<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vslq_<mode>" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v") + (match_operand:VEC_TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vslq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsr<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") @@ -2056,6 +2243,15 @@ "vsr<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vsrq_<mode>" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v") + (match_operand:VEC_TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vsrq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsra<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v") @@ -2064,6 +2260,15 @@ "vsra<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_insn "altivec_vsraq" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" + /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ + "vsraq %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vsr" [(set (match_operand:V4SI 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2624,6 +2829,18 @@ "vcmpequ<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "altivec_vcmpequt_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand" "v") + (match_operand:V1TI 2 "altivec_register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "altivec_register_operand" "=v") + (eq:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpequq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpgts<VI_char>_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v") @@ -2636,6 +2853,18 @@ "vcmpgts<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_vcmpgtst_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "register_operand" "=v") + (gt:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpgtsq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpgtu<VI_char>_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v") @@ -2648,6 +2877,18 @@ "vcmpgtu<VI_char>. %0,%1,%2" [(set_attr "type" "veccmpfx")]) +(define_insn "*altivec_vcmpgtut_p" + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "register_operand" "=v") + (gtu:V1TI (match_dup 1) + (match_dup 2)))] + "TARGET_POWER10" + "vcmpgtuq. %0,%1,%2" + [(set_attr "type" "veccmpfx")]) + (define_insn "*altivec_vcmpeqfp_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v") @@ -4050,6 +4291,30 @@ }) ;; Vector reverse elements +(define_expand "altivec_vreveti2" + [(set (match_operand:TI 0 "register_operand" "=v") + (unspec:TI [(match_operand:TI 1 "register_operand" "v")] + UNSPEC_VREVEV))] + "TARGET_ALTIVEC" +{ + int i, j, size, num_elements; + rtvec v = rtvec_alloc (16); + rtx mask = gen_reg_rtx (V16QImode); + + size = GET_MODE_UNIT_SIZE (TImode); + num_elements = GET_MODE_NUNITS (TImode); + + for (j = 0; j < num_elements; j++) + for (i = 0; i < size; i++) + RTVEC_ELT (v, i + j * size) + = GEN_INT (i + (num_elements - 1 - j) * size); + + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_ti (operands[0], operands[1], + operands[1], mask)); + DONE; +}) + (define_expand "altivec_vreve<mode>2" [(set (match_operand:VEC_A 0 "register_operand" "=v") (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")] diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 026be5d..b89d5ec 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -226,6 +226,13 @@ [(set_attr "type" "dfp") (set_attr "size" "128")]) +(define_insn "floattitd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (float:TD (match_operand:TI 1 "gpc_reg_operand" "v")))] + "TARGET_POWER10" + "dcffixqq %0,%1" + [(set_attr "type" "dfp")]) + ;; Convert a decimal64/128 to a decimal64/128 whose value is an integer. ;; This is the first stage of converting it to an integer type. @@ -247,6 +254,13 @@ "dctfix<q> %0,%1" [(set_attr "type" "dfp") (set_attr "size" "<bits>")]) + +(define_insn "fixtdti2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=v") + (fix:TI (match_operand:TD 1 "gpc_reg_operand" "d")))] + "TARGET_POWER10" + "dctfixqq %0,%1" + [(set_attr "type" "dfp")]) ;; Decimal builtin support diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 609bebd..d55095b 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1269,6 +1269,15 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* See the comment on BU_ALTIVEC_P. */ +#define BU_P10V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P10V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P10, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_P10V_AV_X(ENUM, NAME, ATTR) \ RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_altivec_" NAME, /* NAME */ \ @@ -2868,6 +2877,8 @@ BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd") BU_P9V_OVERLOAD_1 (VPRTYBQ, "vprtybq") BU_P9V_OVERLOAD_1 (VPRTYBW, "vprtybw") BU_P9V_OVERLOAD_1 (VPARITY_LSBB, "vparity_lsbb") +BU_P9V_OVERLOAD_1 (VSIGNEXTI, "vsignexti") +BU_P9V_OVERLOAD_1 (VSIGNEXTLL, "vsignextll") /* 2 argument functions added in ISA 3.0 (power9). */ BU_P9_2 (CMPRB, "byte_in_range", CONST, cmprb) @@ -2879,7 +2890,18 @@ BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range") BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") + +BU_P9V_AV_1 (VSIGNEXTSB2W, "vsignextsb2w", CONST, vsignextend_qi_v4si) +BU_P9V_AV_1 (VSIGNEXTSH2W, "vsignextsh2w", CONST, vsignextend_hi_v4si) +BU_P9V_AV_1 (VSIGNEXTSB2D, "vsignextsb2d", CONST, vsignextend_qi_v2di) +BU_P9V_AV_1 (VSIGNEXTSH2D, "vsignextsh2d", CONST, vsignextend_hi_v2di) +BU_P9V_AV_1 (VSIGNEXTSW2D, "vsignextsw2d", CONST, vsignextend_si_v2di) + /* Builtins for scalar instructions added in ISA 3.1 (power10). */ +BU_P10V_AV_P (VCMPEQUT_P, "vcmpequt_p", CONST, vector_eq_v1ti_p) +BU_P10V_AV_P (VCMPGTST_P, "vcmpgtst_p", CONST, vector_gt_v1ti_p) +BU_P10V_AV_P (VCMPGTUT_P, "vcmpgtut_p", CONST, vector_gtu_v1ti_p) + BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged) BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm) BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm) @@ -2900,7 +2922,38 @@ BU_P10V_VSX_2 (XXGENPCVM_V16QI, "xxgenpcvm_v16qi", CONST, xxgenpcvm_v16qi) BU_P10V_VSX_2 (XXGENPCVM_V8HI, "xxgenpcvm_v8hi", CONST, xxgenpcvm_v8hi) BU_P10V_VSX_2 (XXGENPCVM_V4SI, "xxgenpcvm_v4si", CONST, xxgenpcvm_v4si) BU_P10V_VSX_2 (XXGENPCVM_V2DI, "xxgenpcvm_v2di", CONST, xxgenpcvm_v2di) - +BU_P10V_AV_2 (VCMPGTUT, "vcmpgtut", CONST, vector_gtuv1ti) +BU_P10V_AV_2 (VCMPGTST, "vcmpgtst", CONST, vector_gtv1ti) +BU_P10V_AV_2 (VCMPEQUT, "vcmpequt", CONST, eqvv1ti3) +BU_P10V_AV_2 (CMPNET, "vcmpnet", CONST, vcmpnet) +BU_P10V_AV_2 (CMPGE_1TI, "cmpge_1ti", CONST, vector_nltv1ti) +BU_P10V_AV_2 (CMPGE_U1TI, "cmpge_u1ti", CONST, vector_nltuv1ti) +BU_P10V_AV_2 (CMPLE_1TI, "cmple_1ti", CONST, vector_ngtv1ti) +BU_P10V_AV_2 (CMPLE_U1TI, "cmple_u1ti", CONST, vector_ngtuv1ti) +BU_P10V_AV_2 (VNOR_V1TI_UNS, "vnor_v1ti_uns",CONST, norv1ti3) +BU_P10V_AV_2 (VNOR_V1TI, "vnor_v1ti", CONST, norv1ti3) +BU_P10V_AV_2 (VCMPNET_P, "vcmpnet_p", CONST, vector_ne_v1ti_p) +BU_P10V_AV_2 (VCMPAET_P, "vcmpaet_p", CONST, vector_ae_v1ti_p) + +BU_P10V_AV_1 (VSIGNEXTSD2Q, "vsignext", CONST, vsignextend_v2di_v1ti) + +BU_P10V_AV_2 (VMULEUD, "vmuleud", CONST, vec_widen_umult_even_v2di) +BU_P10V_AV_2 (VMULESD, "vmulesd", CONST, vec_widen_smult_even_v2di) +BU_P10V_AV_2 (VMULOUD, "vmuloud", CONST, vec_widen_umult_odd_v2di) +BU_P10V_AV_2 (VMULOSD, "vmulosd", CONST, vec_widen_smult_odd_v2di) +BU_P10V_AV_2 (VRLQ, "vrlq", CONST, vrotlv1ti3) +BU_P10V_AV_2 (VSLQ, "vslq", CONST, vashlv1ti3) +BU_P10V_AV_2 (VSRQ, "vsrq", CONST, vlshrv1ti3) +BU_P10V_AV_2 (VSRAQ, "vsraq", CONST, vashrv1ti3) +BU_P10V_AV_2 (VRLQNM, "vrlqnm", CONST, altivec_vrlqnm) +BU_P10V_AV_2 (DIV_V1TI, "div_1ti", CONST, vsx_div_v1ti) +BU_P10V_AV_2 (UDIV_V1TI, "udiv_1ti", CONST, vsx_udiv_v1ti) +BU_P10V_AV_2 (DIVES_V1TI, "dives", CONST, vsx_dives_v1ti) +BU_P10V_AV_2 (DIVEU_V1TI, "diveu", CONST, vsx_diveu_v1ti) +BU_P10V_AV_2 (MODS_V1TI, "mods", CONST, vsx_mods_v1ti) +BU_P10V_AV_2 (MODU_V1TI, "modu", CONST, vsx_modu_v1ti) + +BU_P10V_AV_3 (VRLQMI, "vrlqmi", CONST, altivec_vrlqmi) BU_P10V_AV_3 (VEXTRACTBL, "vextdubvlx", CONST, vextractlv16qi) BU_P10V_AV_3 (VEXTRACTHL, "vextduhvlx", CONST, vextractlv8hi) BU_P10V_AV_3 (VEXTRACTWL, "vextduwvlx", CONST, vextractlv4si) @@ -3025,6 +3078,10 @@ BU_P10_OVERLOAD_2 (CLRR, "clrr") BU_P10_OVERLOAD_2 (GNB, "gnb") BU_P10_OVERLOAD_4 (XXEVAL, "xxeval") BU_P10_OVERLOAD_2 (XXGENPCVM, "xxgenpcvm") +BU_P10_OVERLOAD_2 (VRLQ, "vrlq") +BU_P10_OVERLOAD_2 (VSLQ, "vslq") +BU_P10_OVERLOAD_2 (VSRQ, "vsrq") +BU_P10_OVERLOAD_2 (VSRAQ, "vsraq") BU_P10_OVERLOAD_3 (EXTRACTL, "extractl") BU_P10_OVERLOAD_3 (EXTRACTH, "extracth") @@ -3099,6 +3156,7 @@ BU_CRYPTO_OVERLOAD_2A (VPMSUM, "vpmsum") BU_CRYPTO_OVERLOAD_3A (VPERMXOR, "vpermxor") BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") +BU_P10_OVERLOAD_1 (SIGNEXT, "vsignextq") /* HTM functions. */ BU_HTM_1 (TABORT, "tabort", CR, tabort) diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index b4e13af..b0b7f12 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -843,6 +843,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPEQ, P10V_BUILTIN_VCMPEQUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP, @@ -889,6 +893,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0}, + + { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, P10V_BUILTIN_CMPGE_U1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0}, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -903,8 +913,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTUT, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGT, P10V_BUILTIN_VCMPGTST, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP, @@ -947,6 +961,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, P10V_BUILTIN_CMPLE_U1TI, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0}, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -1086,6 +1105,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIV_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_UDIV_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1097,6 +1121,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1108,6 +1137,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 }, @@ -1973,6 +2007,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_MULE, P8V_BUILTIN_VMULEUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULESD, + RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULE, P10V_BUILTIN_VMULEUD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB, @@ -1996,6 +2035,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_MULO, P8V_BUILTIN_VMULOUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOSD, + RS6000_BTI_V1TI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MULO, P10V_BUILTIN_VMULOUD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH, @@ -2038,6 +2082,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, P10V_BUILTIN_VNOR_V1TI_UNS, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR_V2DI_UNS, @@ -2299,6 +2353,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_RL, P10V_BUILTIN_VRLQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW, @@ -2317,12 +2376,23 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, + RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI }, + { P9V_BUILTIN_VEC_RLMI, P10V_BUILTIN_VRLQMI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P9V_BUILTIN_VEC_RLNM, P10V_BUILTIN_VRLQNM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB, @@ -2339,6 +2409,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SL, P10V_BUILTIN_VSLQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP, @@ -2535,6 +2610,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SR, P10V_BUILTIN_VSRQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW, @@ -2563,6 +2643,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SRA, P10V_BUILTIN_VSRAQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW, @@ -4180,12 +4265,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P10V_BUILTIN_VCMPGTST_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, @@ -4250,6 +4339,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P10V_BUILTIN_VCMPEQUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P, @@ -4301,12 +4394,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTUT_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P10V_BUILTIN_VCMPGTST_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P, @@ -4955,6 +5052,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_CMPNE, P9V_BUILTIN_CMPNEW, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET, + RS6000_BTI_bool_V1TI, RS6000_BTI_V1TI, + RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPNE, P10V_BUILTIN_CMPNET, + RS6000_BTI_bool_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, /* The following 2 entries have been deprecated. */ { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEB_P, @@ -5055,6 +5158,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNED_P, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, + RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, @@ -5160,7 +5267,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAED_P, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 }, - + { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, + RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P, @@ -5711,6 +5821,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + /* Sign extend builtins that work work on ISA 3.0, not added until ISA 3.1 */ + { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSB2W, + RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSH2W, + RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSB2D, + RS6000_BTI_V2DI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSH2D, + RS6000_BTI_V2DI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSW2D, + RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + /* Overloaded built-in functions for ISA3.1 (power10). */ { P10_BUILTIN_VEC_CLRL, P10V_BUILTIN_VCLRLB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, @@ -6074,6 +6197,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_XVTLSBB_ONES, P10V_BUILTIN_XVTLSBB_ONES, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P10_BUILTIN_VEC_SIGNEXT, P10V_BUILTIN_VSIGNEXTSD2Q, + RS6000_BTI_V1TI, RS6000_BTI_V2DI, 0, 0 }, + { RS6000_BUILTIN_NONE, RS6000_BUILTIN_NONE, 0, 0, 0, 0 } }; @@ -12552,12 +12678,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case ALTIVEC_BUILTIN_VCMPEQUH: case ALTIVEC_BUILTIN_VCMPEQUW: case P8V_BUILTIN_VCMPEQUD: + case P10V_BUILTIN_VCMPEQUT: fold_compare_helper (gsi, EQ_EXPR, stmt); return true; case P9V_BUILTIN_CMPNEB: case P9V_BUILTIN_CMPNEH: case P9V_BUILTIN_CMPNEW: + case P10V_BUILTIN_CMPNET: fold_compare_helper (gsi, NE_EXPR, stmt); return true; @@ -12569,6 +12697,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_CMPGE_U4SI: case VSX_BUILTIN_CMPGE_2DI: case VSX_BUILTIN_CMPGE_U2DI: + case P10V_BUILTIN_CMPGE_1TI: + case P10V_BUILTIN_CMPGE_U1TI: fold_compare_helper (gsi, GE_EXPR, stmt); return true; @@ -12580,6 +12710,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case ALTIVEC_BUILTIN_VCMPGTUW: case P8V_BUILTIN_VCMPGTUD: case P8V_BUILTIN_VCMPGTSD: + case P10V_BUILTIN_VCMPGTUT: + case P10V_BUILTIN_VCMPGTST: fold_compare_helper (gsi, GT_EXPR, stmt); return true; @@ -12591,6 +12723,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case VSX_BUILTIN_CMPLE_U4SI: case VSX_BUILTIN_CMPLE_2DI: case VSX_BUILTIN_CMPLE_U2DI: + case P10V_BUILTIN_CMPLE_1TI: + case P10V_BUILTIN_CMPLE_U1TI: fold_compare_helper (gsi, LE_EXPR, stmt); return true; @@ -13318,6 +13452,8 @@ rs6000_init_builtins (void) ? "__vector __bool long" : "__vector __bool long long", bool_long_long_type_node, 2); + bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128", + intTI_type_node, 1); pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", pixel_type_node, 8); @@ -13515,6 +13651,10 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V2DI_type_node, V2DI_type_node, NULL_TREE); + tree int_ftype_int_v1ti_v1ti + = build_function_type_list (integer_type_node, + integer_type_node, V1TI_type_node, + V1TI_type_node, NULL_TREE); tree void_ftype_v4si = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_void @@ -13882,6 +14022,9 @@ altivec_init_builtins (void) case E_VOIDmode: type = int_ftype_int_opaque_opaque; break; + case E_V1TImode: + type = int_ftype_int_v1ti_v1ti; + break; case E_V2DImode: type = int_ftype_int_v2di_v2di; break; @@ -14487,12 +14630,16 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case P10V_BUILTIN_XXGENPCVM_V2DI: case P10V_BUILTIN_DIVEU_V4SI: case P10V_BUILTIN_DIVEU_V2DI: + case P10V_BUILTIN_DIVEU_V1TI: case P10V_BUILTIN_DIVU_V4SI: case P10V_BUILTIN_DIVU_V2DI: + case P10V_BUILTIN_MODU_V1TI: case P10V_BUILTIN_MODU_V2DI: case P10V_BUILTIN_MODU_V4SI: case P10V_BUILTIN_MULHU_V2DI: case P10V_BUILTIN_MULHU_V4SI: + case P10V_BUILTIN_VMULEUD: + case P10V_BUILTIN_VMULOUD: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; @@ -14592,10 +14739,13 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case VSX_BUILTIN_CMPGE_U8HI: case VSX_BUILTIN_CMPGE_U4SI: case VSX_BUILTIN_CMPGE_U2DI: + case P10V_BUILTIN_CMPGE_U1TI: case ALTIVEC_BUILTIN_VCMPGTUB: case ALTIVEC_BUILTIN_VCMPGTUH: case ALTIVEC_BUILTIN_VCMPGTUW: case P8V_BUILTIN_VCMPGTUD: + case P10V_BUILTIN_VCMPGTUT: + case P10V_BUILTIN_VCMPEQUT: h.uns_p[1] = 1; h.uns_p[2] = 1; break; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index b01bb5c..75c2cc4 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -11014,10 +11014,10 @@ init_float128_ieee (machine_mode mode) if (TARGET_POWERPC64) { - set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti"); - set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti"); - set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf"); - set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf"); + set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw"); + set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw"); + set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw"); + set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw"); } } @@ -20217,6 +20217,7 @@ rs6000_handle_altivec_attribute (tree *node, case 'b': switch (mode) { + case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break; case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index a5f7b1d..4ca6372 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2321,7 +2321,6 @@ extern int frame_pointer_needed; #define RS6000_BTM_MMA MASK_MMA /* ISA 3.1 MMA. */ #define RS6000_BTM_P10 MASK_POWER10 - #define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \ | RS6000_BTM_VSX \ | RS6000_BTM_P8_VECTOR \ @@ -2434,6 +2433,7 @@ enum rs6000_builtin_type_index RS6000_BTI_bool_V8HI, /* __vector __bool short */ RS6000_BTI_bool_V4SI, /* __vector __bool int */ RS6000_BTI_bool_V2DI, /* __vector __bool long */ + RS6000_BTI_bool_V1TI, /* __vector __bool 128-bit */ RS6000_BTI_pixel_V8HI, /* __vector __pixel */ RS6000_BTI_long, /* long_integer_type_node */ RS6000_BTI_unsigned_long, /* long_unsigned_type_node */ @@ -2487,6 +2487,7 @@ enum rs6000_builtin_type_index #define bool_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V8HI]) #define bool_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V4SI]) #define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI]) +#define bool_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V1TI]) #define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI]) #define long_long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_long]) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 3f59b54..89c70f4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6441,6 +6441,42 @@ xscvsxddp %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "floatti<mode>2" + [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v") + (float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvsqqp %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "floatunsti<mode>2" + [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v") + (unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvuqqp %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "fix_trunc<mode>ti2" + [(set (match_operand:TI 0 "vsx_register_operand" "=v") + (fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvqpsqz %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "fixuns_trunc<mode>ti2" + [(set (match_operand:TI 0 "vsx_register_operand" "=v") + (unsigned_fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvqpuqz %0,%1"; +} + [(set_attr "type" "fp")]) + ; Allow the combiner to merge source memory operands to the conversion so that ; the optimizer/register allocator doesn't try to load the value too early in a ; GPR and then use store/load to move it to a FPR and suffer from a store-load diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 3446b03..7e36c788 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -26,6 +26,9 @@ ;; Vector int modes (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) +;; 128-bit int modes +(define_mode_iterator VEC_TI [V1TI TI]) + ;; Vector int modes for parity (define_mode_iterator VEC_IP [V8HI V4SI @@ -53,7 +56,7 @@ (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI KF TF]) ;; Vector comparison modes -(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) ;; Vector init/extract modes (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -697,6 +700,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_nltv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gt:V1TI (match_operand:V1TI 2 "vlogical_operand") + (match_operand:V1TI 1 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_gtu<mode>" [(set (match_operand:VEC_I 0 "vint_operand") (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand") @@ -704,6 +718,13 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gtuv1ti" + [(set (match_operand:V1TI 0 "altivec_register_operand") + (gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand")))] + "TARGET_POWER10" + "") + ; >= for integer vectors: swap operands and apply not-greater-than (define_expand "vector_nltu<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") @@ -716,6 +737,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_nltuv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gtu:V1TI (match_operand:V1TI 2 "vlogical_operand") + (match_operand:V1TI 1 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_geu<mode>" [(set (match_operand:VEC_I 0 "vint_operand") (geu:VEC_I (match_operand:VEC_I 1 "vint_operand") @@ -735,6 +767,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_ngtv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gt:V1TI (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + (define_expand "vector_ngtu<mode>" [(set (match_operand:VEC_I 3 "vlogical_operand") (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand") @@ -746,6 +789,17 @@ operands[3] = gen_reg_rtx_and_attrs (operands[0]); }) +(define_expand "vector_ngtuv1ti" + [(set (match_operand:V1TI 3 "vlogical_operand") + (gtu:V1TI (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))) + (set (match_operand:V1TI 0 "vlogical_operand") + (not:V1TI (match_dup 3)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}) + ; There are 14 possible vector FP comparison operators, gt and eq of them have ; been expanded above, so just support 12 remaining operators here. @@ -894,6 +948,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_eq_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "vlogical_operand") + (eq:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + ;; This expansion handles the V16QI, V8HI, and V4SI modes in the ;; implementation of the vec_all_ne built-in functions on Power9. (define_expand "vector_ne_<mode>_p" @@ -976,6 +1042,23 @@ operands[3] = gen_reg_rtx (V2DImode); }) +(define_expand "vector_ne_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V1TI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx (V1TImode); +}) + ;; This expansion handles the V2DI mode in the implementation of the ;; vec_any_eq built-in function on Power9. ;; @@ -1002,6 +1085,26 @@ operands[3] = gen_reg_rtx (V2DImode); }) +(define_expand "vector_ae_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(eq:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_dup 3) + (eq:V1TI (match_dup 1) + (match_dup 2)))]) + (set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC CR6_REGNO) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 0) + (const_int 1)))] + "TARGET_POWER10" +{ + operands[3] = gen_reg_rtx (V1TImode); +}) + ;; This expansion handles the V4SF and V2DF modes in the Power9 ;; implementation of the vec_all_ne built-in functions. Note that the ;; expansions for this pattern with these modes makes no use of power9- @@ -1061,6 +1164,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gt_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gt:CC (match_operand:V1TI 1 "vlogical_operand") + (match_operand:V1TI 2 "vlogical_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "vlogical_operand") + (gt:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + (define_expand "vector_ge_<mode>_p" [(parallel [(set (reg:CC CR6_REGNO) @@ -1085,6 +1200,18 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vector_gtu_v1ti_p" + [(parallel + [(set (reg:CC CR6_REGNO) + (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))] + UNSPEC_PREDICATE)) + (set (match_operand:V1TI 0 "altivec_register_operand") + (gtu:V1TI (match_dup 1) + (match_dup 2)))])] + "TARGET_POWER10" + "") + ;; AltiVec/VSX predicates. ;; This expansion is triggered during expansion of predicate built-in @@ -1460,6 +1587,20 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +(define_expand "vrotlv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for rotatert to make use of vrotl (define_expand "vrotr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1481,6 +1622,21 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; No immediate version of this 128-bit instruction +(define_expand "vashl<mode>3" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand") + (match_operand:VEC_TI 2 "vsx_register_operand")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn(gen_altivec_vslq_<mode> (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for logical shift right on each vector element (define_expand "vlshr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1489,6 +1645,21 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +;; No immediate version of this 128-bit instruction +(define_expand "vlshr<mode>3" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand") + (match_operand:VEC_TI 2 "vsx_register_operand")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn(gen_altivec_vsrq_<mode> (operands[0], operands[1], tmp)); + DONE; +}) + ;; Expanders for arithmetic shift right on each vector element (define_expand "vashr<mode>3" [(set (match_operand:VEC_I 0 "vint_operand") @@ -1496,6 +1667,22 @@ (match_operand:VEC_I 2 "vint_operand")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") + +;; No immediate version of this 128-bit instruction +(define_expand "vashrv1ti3" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ + rtx tmp = gen_reg_rtx (V1TImode); + + emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); + emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp)); + DONE; +}) + ;; Vector reduction expanders for VSX ; The (VEC_reduc:... diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bcb92be..f2260ba 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -37,9 +37,6 @@ TI V1TI]) -;; Iterator for 128-bit integer types that go in a single vector register. -(define_mode_iterator VSX_TI [TI V1TI]) - ;; Iterator for the 2 32-bit vector types (define_mode_iterator VSX_W [V4SF V4SI]) @@ -302,6 +299,12 @@ UNSPEC_VSX_XXSPLTD UNSPEC_VSX_DIVSD UNSPEC_VSX_DIVUD + UNSPEC_VSX_DIVSQ + UNSPEC_VSX_DIVUQ + UNSPEC_VSX_DIVESQ + UNSPEC_VSX_DIVEUQ + UNSPEC_VSX_MODSQ + UNSPEC_VSX_MODUQ UNSPEC_VSX_MULSD UNSPEC_VSX_SIGN_EXTEND UNSPEC_VSX_XVCVBF16SPN @@ -946,9 +949,9 @@ ;; special V1TI container class, which it is not appropriate to use vec_select ;; for the type. (define_insn "*vsx_le_permute_<mode>" - [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") - (rotate:VSX_TI - (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r") + [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") + (rotate:VEC_TI + (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r") (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "@ @@ -962,10 +965,10 @@ (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) (define_insn_and_split "*vsx_le_undo_permute_<mode>" - [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa") - (rotate:VSX_TI - (rotate:VSX_TI - (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa") + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa") + (rotate:VEC_TI + (rotate:VEC_TI + (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa") (const_int 64)) (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX" @@ -1037,11 +1040,11 @@ ;; Peepholes to catch loads and stores for TImode if TImode landed in ;; GPR registers on a little endian system. (define_peephole2 - [(set (match_operand:VSX_TI 0 "int_reg_operand") - (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand") + [(set (match_operand:VEC_TI 0 "int_reg_operand") + (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand") (const_int 64))) - (set (match_operand:VSX_TI 2 "int_reg_operand") - (rotate:VSX_TI (match_dup 0) + (set (match_operand:VEC_TI 2 "int_reg_operand") + (rotate:VEC_TI (match_dup 0) (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && (rtx_equal_p (operands[0], operands[2]) @@ -1049,11 +1052,11 @@ [(set (match_dup 2) (match_dup 1))]) (define_peephole2 - [(set (match_operand:VSX_TI 0 "int_reg_operand") - (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand") + [(set (match_operand:VEC_TI 0 "int_reg_operand") + (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand") (const_int 64))) - (set (match_operand:VSX_TI 2 "memory_operand") - (rotate:VSX_TI (match_dup 0) + (set (match_operand:VEC_TI 2 "memory_operand") + (rotate:VEC_TI (match_dup 0) (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && peep2_reg_dead_p (2, operands[0])" @@ -1781,6 +1784,61 @@ } [(set_attr "type" "div")]) +;; Vector integer signed/unsigned divide +(define_insn "vsx_div_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVSQ))] + "TARGET_POWER10" + "vdivsq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_udiv_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVUQ))] + "TARGET_POWER10" + "vdivuq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_dives_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVESQ))] + "TARGET_POWER10" + "vdivesq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_diveu_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_DIVEUQ))] + "TARGET_POWER10" + "vdiveuq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_mods_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_MODSQ))] + "TARGET_POWER10" + "vmodsq %0,%1,%2" + [(set_attr "type" "div")]) + +(define_insn "vsx_modu_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") + (match_operand:V1TI 2 "vsx_register_operand" "v")] + UNSPEC_VSX_MODUQ))] + "TARGET_POWER10" + "vmoduq %0,%1,%2" + [(set_attr "type" "div")]) + ;; *tdiv* instruction returning the FG flag (define_expand "vsx_tdiv<mode>3_fg" [(set (match_dup 3) @@ -3126,6 +3184,21 @@ "xxpermdi %x0,%x1,%x1,2" [(set_attr "type" "vecperm")]) +;; Swap upper/lower 64-bit values in a 128-bit vector +(define_insn "xxswapd_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (subreg:V1TI + (vec_select:V2DI + (subreg:V2DI + (match_operand:V1TI 1 "vsx_register_operand" "v") 0 ) + (parallel [(const_int 1)(const_int 0)])) + 0))] + "TARGET_POWER10" +;; AIX does not support extended mnemonic xxswapd. Use the basic +;; mnemonic xxpermdi instead. + "xxpermdi %x0,%x1,%x1,2" + [(set_attr "type" "vecperm")]) + (define_insn "xxgenpcvm_<mode>_internal" [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa") (unspec:VSX_EXTRACT_I4 @@ -4810,6 +4883,33 @@ (set_attr "type" "vecload")]) +;; ISA 3.1 vector extend sign support +(define_insn "vsx_sign_extend_v2di_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_POWER10" + "vextsd2q %0,%1" +[(set_attr "type" "vecexts")]) + +(define_expand "vsignextend_v2di_v1ti" + [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") + (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_POWER10" +{ + if (BYTES_BIG_ENDIAN) + { + rtx tmp = gen_reg_rtx (V2DImode); + + emit_insn (gen_altivec_vrevev2di2(tmp, operands[1])); + emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], tmp)); + DONE; + } + + emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], operands[1])); +}) + ;; ISA 3.0 vector extend sign support (define_insn "vsx_sign_extend_qi_<mode>" @@ -4821,6 +4921,24 @@ "vextsb2<wd> %0,%1" [(set_attr "type" "vecexts")]) +(define_expand "vsignextend_qi_<mode>" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (unspec:VIlong + [(match_operand:V16QI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + { + rtx tmp = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_vrevev16qi2(tmp, operands[1])); + emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], tmp)); + } + else + emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], operands[1])); + DONE; +}) + (define_insn "vsx_sign_extend_hi_<mode>" [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") (unspec:VSINT_84 @@ -4830,7 +4948,25 @@ "vextsh2<wd> %0,%1" [(set_attr "type" "vecexts")]) -(define_insn "*vsx_sign_extend_si_v2di" +(define_expand "vsignextend_hi_<mode>" + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") + (unspec:VIlong + [(match_operand:V8HI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + { + rtx tmp = gen_reg_rtx (V8HImode); + emit_insn (gen_altivec_vrevev8hi2(tmp, operands[1])); + emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], tmp)); + } + else + emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], operands[1])); + DONE; +}) + +(define_insn "vsx_sign_extend_si_v2di" [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] UNSPEC_VSX_SIGN_EXTEND))] @@ -4838,6 +4974,24 @@ "vextsw2d %0,%1" [(set_attr "type" "vecexts")]) +(define_expand "vsignextend_si_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] + UNSPEC_VSX_SIGN_EXTEND))] + "TARGET_P9_VECTOR" +{ + if (BYTES_BIG_ENDIAN) + { + rtx tmp = gen_reg_rtx (V4SImode); + + emit_insn (gen_altivec_vrevev4si2(tmp, operands[1])); + emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], tmp)); + } + else + emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], operands[1])); + DONE; +}) + ;; ISA 3.1 vector sign extend ;; Move DI value from GPR to TI mode in VSX register, word 1. (define_insn "mtvsrdd_diti_w1" @@ -5525,6 +5679,19 @@ "vcmpneb %0,%1,%2" [(set_attr "type" "vecsimple")]) +;; Vector Compare Not Equal v1ti (specified/not+eq:) +(define_expand "vcmpnet" + [(set (match_operand:V1TI 0 "altivec_register_operand") + (not:V1TI + (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand") + (match_operand:V1TI 2 "altivec_register_operand"))))] + "TARGET_POWER10" +{ + emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2])); + emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0])); + DONE; +}) + ;; Vector Compare Not Equal or Zero Byte (define_insn "vcmpnezb" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") diff --git a/gcc/d/ChangeLog b/gcc/d/ChangeLog index 4e9a396..1b653f6 100644 --- a/gcc/d/ChangeLog +++ b/gcc/d/ChangeLog @@ -1,3 +1,13 @@ +2021-06-09 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/100964 + * dmd/MERGE: Merge upstream dmd 4a4e46a6f. + +2021-06-09 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/100935 + * dmd/MERGE: Merge upstream dmd f3fdeb578. + 2021-06-04 Iain Buclaw <ibuclaw@gdcproject.org> PR d/100882 diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index d29d462..a617f28 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -b7d146c4c34469f876a63f26ff19091a7f9d54d7 +4a4e46a6f304a667e0c05d4455706ec2056ffddc The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/expression.c b/gcc/d/dmd/expression.c index 2592b38..88f13e9 100644 --- a/gcc/d/dmd/expression.c +++ b/gcc/d/dmd/expression.c @@ -1044,8 +1044,11 @@ bool Expression::checkPostblit(Scope *sc, Type *t) t = t->baseElemOf(); if (t->ty == Tstruct) { - // Bugzilla 11395: Require TypeInfo generation for array concatenation - semanticTypeInfo(sc, t); + if (global.params.useTypeInfo) + { + // Bugzilla 11395: Require TypeInfo generation for array concatenation + semanticTypeInfo(sc, t); + } StructDeclaration *sd = ((TypeStruct *)t)->sym; if (sd->postblit) diff --git a/gcc/d/dmd/mtype.c b/gcc/d/dmd/mtype.c index 9ef8ab4..6cccf40 100644 --- a/gcc/d/dmd/mtype.c +++ b/gcc/d/dmd/mtype.c @@ -2040,7 +2040,10 @@ Expression *Type::getProperty(Loc loc, Identifier *ident, int flag) } else if (ident == Id::__xalignof) { - e = new IntegerExp(loc, alignsize(), Type::tsize_t); + unsigned explicitAlignment = alignment(); + unsigned naturalAlignment = alignsize(); + unsigned actualAlignment = (explicitAlignment == STRUCTALIGN_DEFAULT ? naturalAlignment : explicitAlignment); + e = new IntegerExp(loc, actualAlignment, Type::tsize_t); } else if (ident == Id::_init) { diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 22f9e93..c24ecd7 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -19553,6 +19553,22 @@ The second argument to @var{__builtin_crypto_vshasigmad} and integer that is 0 or 1. The third argument to these built-in functions must be a constant integer in the range of 0 to 15. +The following sign extension builtins are provided: + +@smallexample +vector signed int vec_signexti (vector signed char a) +vector signed long long vec_signextll (vector signed char a) +vector signed int vec_signexti (vector signed short a) +vector signed long long vec_signextll (vector signed short a) +vector signed long long vec_signextll (vector signed int a) +vector signed long long vec_signextq (vector signed long long a) +@end smallexample + +Each element of the result is produced by sign-extending the element of the +input vector that would fall in the least significant portion of the result +element. For example, a sign-extension of a vector signed char to a vector +signed long long will sign extend the rightmost byte of each doubleword. + @node PowerPC AltiVec Built-in Functions Available on ISA 3.1 @subsubsection PowerPC AltiVec Built-in Functions Available on ISA 3.1 @@ -20174,6 +20190,177 @@ Generate PCV from specified Mask size, as if implemented by the immediate value is either 0, 1, 2 or 3. @findex vec_genpcvm +@smallexample +@exdent vector unsigned __int128 vec_rl (vector unsigned __int128 A, + vector unsigned __int128 B); +@exdent vector signed __int128 vec_rl (vector signed __int128 A, + vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by rotating the corresponding element +of A left by the number of bits specified by the corresponding element of B. + + +@smallexample +@exdent vector unsigned __int128 vec_rlmi (vector unsigned __int128, + vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_rlmi (vector signed __int128, + vector signed __int128, + vector unsigned __int128); +@end smallexample + +Returns the result of rotating the first input and inserting it under mask +into the second input. The first bit in the mask, the last bit in the mask are +obtained from the two 7-bit fields bits [108:115] and bits [117:123] +respectively of the second input. The shift is obtained from the third input +in the 7-bit field [125:131] where all bits counted from zero at the left. + +@smallexample +@exdent vector unsigned __int128 vec_rlnm (vector unsigned __int128, + vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_rlnm (vector signed __int128, + vector unsigned __int128, + vector unsigned __int128); +@end smallexample + +Returns the result of rotating the first input and ANDing it with a mask. The +first bit in the mask and the last bit in the mask are obtained from the two +7-bit fields bits [117:123] and bits [125:131] respectively of the second +input. The shift is obtained from the third input in the 7-bit field bits +[125:131] where all bits counted from zero at the left. + +@smallexample +@exdent vector unsigned __int128 vec_sl(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sl(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by shifting the corresponding element of +A left by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_sr(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sr(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by shifting the corresponding element of +A right by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_sra(vector unsigned __int128 A, vector unsigned __int128 B); +@exdent vector signed __int128 vec_sra(vector signed __int128 A, vector unsigned __int128 B); +@end smallexample + +Result value: Each element of R is obtained by arithmetic shifting the corresponding +element of A right by the number of bits specified by the corresponding element of B. + +@smallexample +@exdent vector unsigned __int128 vec_mule (vector unsigned long long, + vector unsigned long long); +@exdent vector signed __int128 vec_mule (vector signed long long, + vector signed long long); +@end smallexample + +Returns a vector containing a 128-bit integer result of multiplying the even +doubleword elements of the two inputs. + +@smallexample +@exdent vector unsigned __int128 vec_mulo (vector unsigned long long, + vector unsigned long long); +@exdent vector signed __int128 vec_mulo (vector signed long long, + vector signed long long); +@end smallexample + +Returns a vector containing a 128-bit integer result of multiplying the odd +doubleword elements of the two inputs. + +@smallexample +@exdent vector unsigned __int128 vec_div (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_div (vector signed __int128, + vector signed __int128); +@end smallexample + +Returns the result of dividing the first operand by the second operand. An +attempt to divide any value by zero or to divide the most negative signed +128-bit integer by negative one results in an undefined value. + +@smallexample +@exdent vector unsigned __int128 vec_dive (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_dive (vector signed __int128, + vector signed __int128); +@end smallexample + +The result is produced by shifting the first input left by 128 bits and +dividing by the second. If an attempt is made to divide by zero or the result +is larger than 128 bits, the result is undefined. + +@smallexample +@exdent vector unsigned __int128 vec_mod (vector unsigned __int128, + vector unsigned __int128); +@exdent vector signed __int128 vec_mod (vector signed __int128, + vector signed __int128); +@end smallexample + +The result is the modulo result of dividing the first input by the second +input. + +The following builtins perform 128-bit vector comparisons. The +@code{vec_all_xx}, @code{vec_any_xx}, and @code{vec_cmpxx}, where @code{xx} is +one of the operations @code{eq, ne, gt, lt, ge, le} perform pairwise +comparisons between the elements at the same positions within their two vector +arguments. The @code{vec_all_xx}function returns a non-zero value if and only +if all pairwise comparisons are true. The @code{vec_any_xx} function returns +a non-zero value if and only if at least one pairwise comparison is true. The +@code{vec_cmpxx}function returns a vector of the same type as its two +arguments, within which each element consists of all ones to denote that +specified logical comparison of the corresponding elements was true. +Otherwise, the element of the returned vector contains all zeros. + +@smallexample +vector bool __int128 vec_cmpeq (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpeq (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpne (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpne (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpgt (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpgt (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmplt (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmplt (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmpge (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmpge (vector unsigned __int128, vector unsigned __int128); +vector bool __int128 vec_cmple (vector signed __int128, vector signed __int128); +vector bool __int128 vec_cmple (vector unsigned __int128, vector unsigned __int128); + +int vec_all_eq (vector signed __int128, vector signed __int128); +int vec_all_eq (vector unsigned __int128, vector unsigned __int128); +int vec_all_ne (vector signed __int128, vector signed __int128); +int vec_all_ne (vector unsigned __int128, vector unsigned __int128); +int vec_all_gt (vector signed __int128, vector signed __int128); +int vec_all_gt (vector unsigned __int128, vector unsigned __int128); +int vec_all_lt (vector signed __int128, vector signed __int128); +int vec_all_lt (vector unsigned __int128, vector unsigned __int128); +int vec_all_ge (vector signed __int128, vector signed __int128); +int vec_all_ge (vector unsigned __int128, vector unsigned __int128); +int vec_all_le (vector signed __int128, vector signed __int128); +int vec_all_le (vector unsigned __int128, vector unsigned __int128); + +int vec_any_eq (vector signed __int128, vector signed __int128); +int vec_any_eq (vector unsigned __int128, vector unsigned __int128); +int vec_any_ne (vector signed __int128, vector signed __int128); +int vec_any_ne (vector unsigned __int128, vector unsigned __int128); +int vec_any_gt (vector signed __int128, vector signed __int128); +int vec_any_gt (vector unsigned __int128, vector unsigned __int128); +int vec_any_lt (vector signed __int128, vector signed __int128); +int vec_any_lt (vector unsigned __int128, vector unsigned __int128); +int vec_any_ge (vector signed __int128, vector signed __int128); +int vec_any_ge (vector unsigned __int128, vector unsigned __int128); +int vec_any_le (vector signed __int128, vector signed __int128); +int vec_any_le (vector unsigned __int128, vector unsigned __int128); +@end smallexample + + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 4066347..42391c2 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -4128,6 +4128,9 @@ supported, so @option{--enable-threads=dce} does not work. @end html @anchor{x-x-linux-gnu} @heading *-*-linux-gnu +The @code{.init_array} and @code{.fini_array} sections are enabled +unconditionally which requires at least glibc 2.1 and binutils 2.12. + Versions of libstdc++-v3 starting with 3.2.1 require bug fixes present in glibc 2.2.5 and later. More information is available in the libstdc++-v3 documentation. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4a9dbd3..a9c97fc 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16611,6 +16611,11 @@ Moreover, code compiled with @option{-ftrampolines} and code compiled with present. This option must therefore be used on a program-wide basis and be manipulated with extreme care. +For languages other than Ada, the @code{-ftrampolines} and +@code{-fno-trampolines} options currently have no effect, and +trampolines are always generated on platforms that need them +for nested functions. + @item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]} @opindex fvisibility Set the default ELF image symbol visibility to the specified option---all @@ -31813,8 +31818,8 @@ startup object and entry point. This option is available for Cygwin and MinGW targets. It specifies that the @code{dllimport} attribute should be ignored. -@item -mthread -@opindex mthread +@item -mthreads +@opindex mthreads This option is available for MinGW targets. It specifies that MinGW-specific thread support is to be used. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0a6ae0b..fc7eb77 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5489,6 +5489,11 @@ not ABI-compliant, this option is typically used only on a per-language basis (notably by Ada) or when it can otherwise be applied to the whole program. +For languages other than Ada, the @code{-ftrampolines} and +@code{-fno-trampolines} options currently have no effect, and +trampolines are always generated on platforms that need them +for nested functions. + Define the following hook if your backend either implements ABI-specified descriptor support, or can use GCC's generic descriptor implementation for nested functions. diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index d9fbbe2..33532f0 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -3829,6 +3829,11 @@ not ABI-compliant, this option is typically used only on a per-language basis (notably by Ada) or when it can otherwise be applied to the whole program. +For languages other than Ada, the @code{-ftrampolines} and +@code{-fno-trampolines} options currently have no effect, and +trampolines are always generated on platforms that need them +for nested functions. + Define the following hook if your backend either implements ABI-specified descriptor support, or can use GCC's generic descriptor implementation for nested functions. diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index a1d70b62..d6cb9b1 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -1375,6 +1375,9 @@ fold_const_call_sss (real_value *result, combined_fn fn, CASE_CFN_FDIM: return do_mpfr_arg2 (result, mpfr_dim, arg0, arg1, format); + CASE_CFN_FMOD: + return do_mpfr_arg2 (result, mpfr_fmod, arg0, arg1, format); + CASE_CFN_HYPOT: return do_mpfr_arg2 (result, mpfr_hypot, arg0, arg1, format); diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 554afaa..242c680 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,12 @@ +2021-06-09 Martin Liska <mliska@suse.cz> + + * intrinsic.texi: Add missing @headitem to tables with a header. + +2021-06-09 Jakub Jelinek <jakub@redhat.com> + + PR fortran/100965 + * trans-openmp.c (gfc_omp_finish_clause): Gimplify OMP_CLAUSE_SIZE. + 2021-06-08 Tobias Burnus <tobias@codesourcery.com> PR middle-end/99928 diff --git a/gcc/match.pd b/gcc/match.pd index d06ff17..bf22bc3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3733,10 +3733,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (integer_onep (@1)) (convert (convert:boolean_type_node @0))) /* a ? -1 : 0 -> -a. */ - (if (integer_all_onesp (@1)) + (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1)) (negate (convert (convert:boolean_type_node @0)))) /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ - (if (!POINTER_TYPE_P (type) && integer_pow2p (@1)) + (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) (with { tree shift = build_int_cst (integer_type_node, tree_log2 (@1)); } @@ -3750,10 +3750,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (integer_onep (@2)) (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))) /* a ? -1 : 0 -> -(!a). */ - (if (integer_all_onesp (@2)) + (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2)) (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))) /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */ - (if (!POINTER_TYPE_P (type) && integer_pow2p (@2)) + (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) (with { tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 640fcbe..9e31d68 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,103 @@ +2021-06-09 Andrew Pinski <apinski@marvell.com> + + * g++.dg/torture/pr100925.C: New test. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/int_128bit-runnable.c (extsd2q): Update expected + count. + Add tests for vec_signextq. + * gcc.target/powerpc/p9-sign_extend-runnable.c: New test case. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/fp128_conversions.c: New file. + * gcc.target/powerpc/int_128bit-runnable.c(vextsd2q, + vcmpuq, vcmpsq, vcmpequq, vcmpequq., vcmpgtsq, vcmpgtsq. + vcmpgtuq, vcmpgtuq.): Update scan-assembler-times. + (ppc_native_128bit): Remove dg-require-effective-target. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left + tests. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/int_128bit-runnable.c: Add 128-bit DFP + conversion tests. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/int_128bit-runnable.c: New test file. + +2021-06-09 Carl Love <cel@us.ibm.com> + + * gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c: + New runnable test case. + * gcc.target/powerpc/vec-rlmi-rlnm.c: Update scan assembler times + for xxlor instruction. + +2021-06-09 Christophe Lyon <christophe.lyon@linaro.org> + + * gcc.target/arm/simd/mve-vclz.c: New test. + +2021-06-09 Christophe Lyon <christophe.lyon@linaro.org> + + * gcc.target/arm/simd/mve-vhadd-1.c: New test. + * gcc.target/arm/simd/mve-vhadd-2.c: New test. + * gcc.target/arm/simd/neon-vhadd-1.c: New test. + * gcc.target/arm/simd/neon-vhadd-2.c: New test. + +2021-06-09 Roger Sayle <roger@nextmovesoftware.com> + + * gcc.dg/builtins-70.c: New test. + +2021-06-09 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> + + * gcc.target/powerpc/fusion-p10-2logical.c: Update fused insn + counts to test 32 and 64 bit separately. + * gcc.target/powerpc/fusion-p10-addadd.c: Update fused insn + counts to test 32 and 64 bit separately. + * gcc.target/powerpc/fusion-p10-ldcmpi.c: Update fused insn + counts to test 32 and 64 bit separately. + * gcc.target/powerpc/fusion-p10-logadd.c: Update fused insn + counts to test 32 and 64 bit separately. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100981 + * gfortran.dg/vect/pr100981-1.f90: New testcase. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/97832 + * gcc.dg/vect/pr97832-1.c: New testcase. + * gcc.dg/vect/pr97832-2.c: Likewise. + * gcc.dg/vect/pr97832-3.c: Likewise. + * g++.dg/vect/slp-pr98855.cc: XFAIL. + * gcc.dg/vect/slp-50.c: New file. + +2021-06-09 Jakub Jelinek <jakub@redhat.com> + + PR fortran/100965 + * gfortran.dg/gomp/pr100965.f90: New test. + +2021-06-09 Uroš Bizjak <ubizjak@gmail.com> + + PR target/100936 + * gcc.target/i386/pr100936.c: New test. + +2021-06-09 Xionghu Luo <luoxhu@linux.ibm.com> + + * gcc.target/powerpc/float128-call.c: Adjust. + * gcc.target/powerpc/pr100085.c: New test. + +2021-06-09 Jason Merrill <jason@redhat.com> + + PR c++/100879 + * g++.dg/diagnostic/enum3.C: New test. + 2021-06-08 Marek Polacek <polacek@redhat.com> PR c++/100065 diff --git a/gcc/testsuite/g++.dg/torture/pr100925.C b/gcc/testsuite/g++.dg/torture/pr100925.C new file mode 100644 index 0000000..de13950 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr100925.C @@ -0,0 +1,24 @@ +// { dg-do compile } + +struct QScopedPointerDeleter { + static void cleanup(int *); +}; +class QScopedPointer { + typedef int *QScopedPointer::*RestrictedBool; + +public: + operator RestrictedBool() { return d ? nullptr : &QScopedPointer::d; } + void reset() { + if (d) + QScopedPointerDeleter::cleanup(d); + } + int *d; +}; +class DOpenGLPaintDevicePrivate { +public: + QScopedPointer fbo; +} DOpenGLPaintDeviceresize_d; +void DOpenGLPaintDeviceresize() { + if (DOpenGLPaintDeviceresize_d.fbo) + DOpenGLPaintDeviceresize_d.fbo.reset(); +} diff --git a/gcc/testsuite/g++.dg/vect/slp-pr98855.cc b/gcc/testsuite/g++.dg/vect/slp-pr98855.cc index 0b4e479..b101032 100644 --- a/gcc/testsuite/g++.dg/vect/slp-pr98855.cc +++ b/gcc/testsuite/g++.dg/vect/slp-pr98855.cc @@ -81,4 +81,6 @@ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, uint32_t *EK) } } -// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { target x86_64-*-* i?86-*-* } } } +// This used to work on { target x86_64-*-* i?86-*-* } but a fix in SLP +// discovery makes us trip over the threshold again. +// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { xfail *-*-* } } } diff --git a/gcc/testsuite/gcc.dg/builtins-70.c b/gcc/testsuite/gcc.dg/builtins-70.c new file mode 100644 index 0000000..a0c2dc9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/builtins-70.c @@ -0,0 +1,46 @@ +/* Copyright (C) 2021 Free Software Foundation. + + Check that constant folding of built-in fmod functions doesn't + break anything and produces the expected results. + +/* { dg-do link } */ +/* { dg-options "-O2 -ffast-math" } */ + +extern void link_error(void); + +extern double fmod(double,double); +extern float fmodf(float,float); +extern long double fmodl(long double,long double); + +int main() +{ + if (fmod (6.5, 2.3) < 1.8999 || fmod (6.5, 2.3) > 1.9001) + link_error (); + if (fmod (-6.5, 2.3) < -1.9001 || fmod (-6.5, 2.3) > -1.8999) + link_error (); + if (fmod (6.5, -2.3) < 1.8999 || fmod (6.5, -2.3) > 1.9001) + link_error (); + if (fmod (-6.5, -2.3) < -1.9001 || fmod (-6.5, -2.3) > -1.8999) + link_error (); + + if (fmodf (6.5f, 2.3f) < 1.8999f || fmodf (6.5f, 2.3f) > 1.9001f) + link_error (); + if (fmodf (-6.5f, 2.3f) < -1.9001f || fmodf (-6.5f, 2.3f) > -1.8999f) + link_error (); + if (fmodf (6.5f, -2.3f) < 1.8999f || fmodf (6.5f, -2.3f) > 1.9001f) + link_error (); + if (fmodf (-6.5f, -2.3f) < -1.9001f || fmodf (-6.5f, -2.3f) > -1.8999f) + link_error (); + + if (fmodl (6.5l, 2.3l) < 1.8999l || fmod (6.5l, 2.3l) > 1.9001l) + link_error (); + if (fmodl (-6.5l, 2.3l) < -1.9001l || fmod (-6.5l, 2.3l) > -1.8999l) + link_error (); + if (fmodl (6.5l, -2.3l) < 1.8999l || fmod (6.5l, -2.3l) > 1.9001l) + link_error (); + if (fmodl (-6.5l, -2.3l) < -1.9001l || fmod (-6.5l, -2.3l) > -1.8999l) + link_error (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-1.c b/gcc/testsuite/gcc.dg/vect/pr97832-1.c new file mode 100644 index 0000000..063fc7b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97832-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-require-effective-target vect_double } */ + +double a[1024], b[1024], c[1024]; + +void foo() +{ + for (int i = 0; i < 256; ++i) + { + a[2*i] = a[2*i] + b[2*i] - c[2*i]; + a[2*i+1] = a[2*i+1] - b[2*i+1] - c[2*i+1]; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-2.c b/gcc/testsuite/gcc.dg/vect/pr97832-2.c new file mode 100644 index 0000000..4f05781 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97832-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-require-effective-target vect_double } */ + +void foo1x1(double* restrict y, const double* restrict x, int clen) +{ + int xi = clen & 2; + double f_re = x[0+xi+0]; + double f_im = x[4+xi+0]; + int clen2 = (clen+xi) * 2; +#pragma GCC unroll 0 + for (int c = 0; c < clen2; c += 8) { + // y[c] = y[c] - x[c]*conj(f); +#pragma GCC unroll 4 + for (int k = 0; k < 4; ++k) { + double x_re = x[c+0+k]; + double x_im = x[c+4+k]; + double y_re = y[c+0+k]; + double y_im = y[c+4+k]; + y_re = y_re - x_re * f_re - x_im * f_im;; + y_im = y_im + x_re * f_im - x_im * f_re; + y[c+0+k] = y_re; + y[c+4+k] = y_im; + } + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-3.c b/gcc/testsuite/gcc.dg/vect/pr97832-3.c new file mode 100644 index 0000000..ad1225d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97832-3.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-require-effective-target vect_double } */ + +void foo(double* restrict y, const double* restrict x0, const double* restrict x1, int clen) +{ + int xi = clen & 2; + double f00_re = x0[0+xi+0]; + double f10_re = x1[0+xi+0]; + double f01_re = x0[0+xi+1]; + double f11_re = x1[0+xi+1]; + double f00_im = x0[4+xi+0]; + double f10_im = x1[4+xi+0]; + double f01_im = x0[4+xi+1]; + double f11_im = x1[4+xi+1]; + int clen2 = (clen+xi) * 2; + double* y0 = &y[0]; + double* y1 = &y[clen2]; + #pragma GCC unroll 0 + for (int c = 0; c < clen2; c += 8) { + // y0[c] = y0[c] - x0[c]*conj(f00) - x1[c]*conj(f10); + // y1[c] = y1[c] - x0[c]*conj(f01) - x1[c]*conj(f11); + #pragma GCC unroll 4 + for (int k = 0; k < 4; ++k) { + double x0_re = x0[c+0+k]; + double x0_im = x0[c+4+k]; + double y0_re = y0[c+0+k]; + double y0_im = y0[c+4+k]; + double y1_re = y1[c+0+k]; + double y1_im = y1[c+4+k]; + y0_re = y0_re - x0_re * f00_re - x0_im * f00_im; + y0_im = y0_im + x0_re * f00_im - x0_im * f00_re; + y1_re = y1_re - x0_re * f01_re - x0_im * f01_im; + y1_im = y1_im + x0_re * f01_im - x0_im * f01_re; + double x1_re = x1[c+0+k]; + double x1_im = x1[c+4+k]; + y0_re = y0_re - x1_re * f10_re - x1_im * f10_im; + y0_im = y0_im + x1_re * f10_im - x1_im * f10_re; + y1_re = y1_re - x1_re * f11_re - x1_im * f11_im; + y1_im = y1_im + x1_re * f11_im - x1_im * f11_re; + y0[c+0+k] = y0_re; + y0[c+4+k] = y0_im; + y1[c+0+k] = y1_re; + y1[c+4+k] = y1_im; + } + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-50.c b/gcc/testsuite/gcc.dg/vect/slp-50.c new file mode 100644 index 0000000..17509e6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-50.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffast-math" } */ + +typedef int Quantum; +typedef struct { + Quantum blue, green; +} PixelPacket; +PixelPacket *EnhanceImage_image_q; +int EnhanceImage_image_x; +float EnhanceImage_image_distance_squared_total_weight; +void EnhanceImage_image_distance_squared() +{ + float zero_1; + for (; EnhanceImage_image_x; EnhanceImage_image_x++) { + EnhanceImage_image_distance_squared_total_weight += 5.0; + EnhanceImage_image_q->green = EnhanceImage_image_q->blue = + zero_1 + EnhanceImage_image_distance_squared_total_weight / 2 - 1; + EnhanceImage_image_q++; + } +} diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c new file mode 100644 index 0000000..7068736 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +#define FUNC(SIGN, TYPE, BITS, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ + } \ +} + +FUNC(s, int, 32, clz) +FUNC(u, uint, 32, clz) +FUNC(s, int, 16, clz) +FUNC(u, uint, 16, clz) +FUNC(s, int, 8, clz) +FUNC(u, uint, 8, clz) + +/* 16 and 8-bit versions are not vectorized because they need pack/unpack + patterns since __builtin_clz uses 32-bit parameter and return value. */ +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c new file mode 100644 index 0000000..19d5f5a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +/* We force a cast to int64_t to enable the vectorizer when dealing with 32-bit + inputs. */ +#define FUNC(SIGN, TYPE, BITS, OP, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = ((int64_t)a[i] OP b[i]) >> 1; \ + } \ +} + +FUNC(s, int, 32, +, vhadd) +FUNC(u, uint, 32, +, vhadd) +FUNC(s, int, 16, +, vhadd) +FUNC(u, uint, 16, +, vhadd) +FUNC(s, int, 8, +, vhadd) +FUNC(u, uint, 8, +, vhadd) + +/* { dg-final { scan-assembler-times {vhadd\.s32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.s16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.s8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-2.c b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-2.c new file mode 100644 index 0000000..30029fc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vhadd-2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +/* We force a cast to int64_t to enable the vectorizer when dealing with 32-bit + inputs. */ +#define FUNC(SIGN, TYPE, BITS, OP, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = ((int64_t)a[i] OP b[i] + 1) >> 1; \ + } \ +} + +FUNC(s, int, 32, +, vrhadd) +FUNC(u, uint, 32, +, vrhadd) +FUNC(s, int, 16, +, vrhadd) +FUNC(u, uint, 16, +, vrhadd) +FUNC(s, int, 8, +, vrhadd) +FUNC(u, uint, 8, +, vrhadd) + +/* { dg-final { scan-assembler-times {vrhadd\.s32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.s16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.s8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-1.c b/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-1.c new file mode 100644 index 0000000..ce57784 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-add-options arm_neon } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +/* Since we have implemented the avg* optabs for 128-bit vectors only, use + enough iterations to check that vectorization works as expected. */ + +/* We force a cast to int64_t to enable the vectorizer when dealing with 32-bit + inputs. */ +#define FUNC(SIGN, TYPE, BITS, OP, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = ((int64_t)a[i] OP b[i]) >> 1; \ + } \ +} + +FUNC(s, int, 32, +, vhadd) +FUNC(u, uint, 32, +, vhadd) +FUNC(s, int, 16, +, vhadd) +FUNC(u, uint, 16, +, vhadd) +FUNC(s, int, 8, +, vhadd) +FUNC(u, uint, 8, +, vhadd) + +/* { dg-final { scan-assembler-times {vhadd\.s32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.s16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.s8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vhadd\.u8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-2.c b/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-2.c new file mode 100644 index 0000000..f269254 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/neon-vhadd-2.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-add-options arm_neon } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +/* Since we default to -mvectorize-with-neon-quad, use enough iterations so that + we can vectorize using 128-bit vectors. */ +/* We force a cast to int64_t to enable the vectorizer when dealing with 32-bit + inputs. */ +#define FUNC(SIGN, TYPE, BITS, OP, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = ((int64_t)a[i] OP b[i] + 1) >> 1; \ + } \ +} + +FUNC(s, int, 32, +, vrhadd) +FUNC(u, uint, 32, +, vrhadd) +FUNC(s, int, 16, +, vrhadd) +FUNC(u, uint, 16, +, vrhadd) +FUNC(s, int, 8, +, vrhadd) +FUNC(u, uint, 8, +, vrhadd) + +/* { dg-final { scan-assembler-times {vrhadd\.s32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.s16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.s8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vrhadd\.u8\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c b/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c new file mode 100644 index 0000000..cd67b06 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/check-builtin-vec_rlnm-runnable.c @@ -0,0 +1,231 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */ + +/* Verify the vec_rlm and vec_rlmi builtins works correctly. */ +/* { dg-final { scan-assembler-times {\mvrldmi\M} 1 } } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#include <stdlib.h> +#endif + +void abort (void); + +int main () +{ + int i; + + vector unsigned int vec_arg1_int, vec_arg2_int, vec_arg3_int; + vector unsigned int vec_result_int, vec_expected_result_int; + + vector unsigned long long int vec_arg1_di, vec_arg2_di, vec_arg3_di; + vector unsigned long long int vec_result_di, vec_expected_result_di; + + unsigned int mask_begin, mask_end, shift; + unsigned long long int mask; + +/* Check vec int version of vec_rlmi builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = 0xA1B1CDEF; + vec_arg3_int[i] = mask_begin << 16 | mask_end << 8 | shift; + + /* do rotate */ + vec_expected_result_int[i] = ( vec_arg2_int[i] & ~mask) + | ((vec_arg1_int[i] << shift) | (vec_arg1_int[i] >> (32-shift))) & mask; + + } + + /* vec_rlmi(arg1, arg2, arg3) + result - rotate each element of arg2 left and inserts it into arg1 + element based on the mask specified in arg3. The shift, mask + start and end is specified in arg3. */ + vec_result_int = vec_rlmi (vec_arg1_int, vec_arg2_int, vec_arg3_int); + + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#ifdef DEBUG + printf("ERROR: i = %d, vec_rlmi int result 0x%x, does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long long int version of vec_rlmi builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x8000000000000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x1234567800000000 + i*0x11111111; + vec_arg2_di[i] = 0xA1B1C1D1E1F12345; + vec_arg3_di[i] = mask_begin << 16 | mask_end << 8 | shift; + + /* do rotate */ + vec_expected_result_di[i] = ( vec_arg2_di[i] & ~mask) + | ((vec_arg1_di[i] << shift) | (vec_arg1_di[i] >> (64-shift))) & mask; + } + + /* vec_rlmi(arg1, arg2, arg3) + result - rotate each element of arg1 left and inserts it into arg2 + element based on the mask specified in arg3. The shift, mask, start + and end is specified in arg3. */ + vec_result_di = vec_rlmi (vec_arg1_di, vec_arg2_di, vec_arg3_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#ifdef DEBUG + printf("ERROR: i = %d, vec_rlmi int long long result 0x%llx, does not match " + "expected result 0x%llx\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + /* Check vec int version of vec_rlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = shift; + vec_arg3_int[i] = mask_begin << 8 | mask_end; + vec_expected_result_int[i] = (vec_arg1_int[i] << shift) & mask; + } + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2. + Then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_result_int = vec_rlnm (vec_arg1_int, vec_arg2_int, vec_arg3_int); + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#ifdef DEBUG + printf("ERROR: vec_rlnm, i = %d, int result 0x%x does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long int version of builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 20; + + for (i = 0; i < 63; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x8000000000000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x123456789ABCDE00ULL + i*0x1111111111111111ULL; + vec_arg2_di[i] = shift; + vec_arg3_di[i] = mask_begin << 8 | mask_end; + vec_expected_result_di[i] = (vec_arg1_di[i] << shift) & mask; + } + + vec_result_di = vec_rlnm (vec_arg1_di, vec_arg2_di, vec_arg3_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#ifdef DEBUG + printf("ERROR: vec_rlnm, i = %d, long long int result 0x%llx does not " + "match expected result 0x%llx\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + /* Check vec int version of vec_vrlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 16; + + for (i = 0; i < 31; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x80000000ULL >> i; + + for (i = 0; i < 4; i++) { + vec_arg1_int[i] = 0x12345678 + i*0x11111111; + vec_arg2_int[i] = mask_begin << 16 | mask_end << 8 | shift; + vec_expected_result_int[i] = (vec_arg1_int[i] << shift) & mask; + } + + /* vec_vrlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left then AND with mask. The mask + start, stop bits is specified in the second argument. The shift amount + is also specified in the second argument. */ + vec_result_int = vec_vrlnm (vec_arg1_int, vec_arg2_int); + + for (i = 0; i < 4; i++) { + if (vec_result_int[i] != vec_expected_result_int[i]) +#ifdef DEBUG + printf("ERROR: vec_vrlnm, i = %d, int result 0x%x does not match " + "expected result 0x%x\n", i, vec_result_int[i], + vec_expected_result_int[i]); +#else + abort(); +#endif + } + +/* Check vec long int version of vec_vrlnm builtin */ + mask = 0; + mask_begin = 0; + mask_end = 4; + shift = 20; + + for (i = 0; i < 63; i++) + if ((i >= mask_begin) && (i <= mask_end)) + mask |= 0x8000000000000000ULL >> i; + + for (i = 0; i < 2; i++) { + vec_arg1_di[i] = 0x123456789ABCDE00ULL + i*0x1111111111111111ULL; + vec_arg2_di[i] = mask_begin << 16 | mask_end << 8 | shift; + vec_expected_result_di[i] = (vec_arg1_di[i] << shift) & mask; + } + + vec_result_di = vec_vrlnm (vec_arg1_di, vec_arg2_di); + + for (i = 0; i < 2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) +#ifdef DEBUG + printf("ERROR: vec_vrlnm, i = %d, long long int result 0x%llx does not " + "match expected result 0x%llx\n", i, vec_result_di[i], + vec_expected_result_di[i]); +#else + abort(); +#endif + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/fp128_conversions.c b/gcc/testsuite/gcc.target/powerpc/fp128_conversions.c new file mode 100644 index 0000000..c20282f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fp128_conversions.c @@ -0,0 +1,294 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ + +/* Check that the expected 128-bit instructions are generated if the processor + supports the 128-bit integer instructions. */ +/* { dg-final { scan-assembler-times {\mxscvsqqp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxscvuqqp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxscvqpsqz\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxscvqpuqz\M} 1 } } */ + +#include <stdio.h> +#include <math.h> +#include <fenv.h> +#include <stdlib.h> +#include <wchar.h> + +#define DEBUG 0 + +void +abort (void); + +float +conv_i_2_fp( long long int a) +{ + return (float) a; +} + +double +conv_i_2_fpd( long long int a) +{ + return (double) a; +} + +double +conv_ui_2_fpd( unsigned long long int a) +{ + return (double) a; +} + +__float128 +conv_i128_2_fp128 (__int128_t a) +{ + // default, gen inst KF mode + // -mabi=ibmlongdouble, gen inst floattiieee KF mode + // -mabi=ieeelongdouble gen inst floattiieee TF mode + return (__float128) a; +} + +__float128 +conv_ui128_2_fp128 (__uint128_t a) +{ + // default, gen inst KF mode + // -mabi=ibmlongdouble, gen inst floattiieee KF mode + // -mabi=ieeelongdouble gen inst floattiieee TF mode + return (__float128) a; +} + +__int128_t +conv_fp128_2_i128 (__float128 a) +{ + // default, gen inst KF mode + // -mabi=ibmlongdouble, gen inst floattiieee KF mode + // -mabi=ieeelongdouble gen inst floattiieee TF mode + return (__int128_t) a; +} + +__uint128_t +conv_fp128_2_ui128 (__float128 a) +{ + // default, gen inst KF mode + // -mabi=ibmlongdouble, gen inst floattiieee KF mode + // -mabi=ieeelongdouble gen inst floattiieee TF mode + return (__uint128_t) a; +} + +long double +conv_i128_2_ld (__int128_t a) +{ + // default, gen call __floattitf + // -mabi=ibmlongdouble, gen call __floattitf + // -mabi=ieeelongdouble gen inst floattiieee TF mode + return (long double) a; +} + +__ibm128 +conv_i128_2_ibm128 (__int128_t a) +{ + // default, gen call __floattitf + // -mabi=ibmlongdouble, gen call __floattitf + // -mabi=ieeelongdouble, message uses IBM long double, no binary output + return (__ibm128) a; +} + +int +main() +{ + float a, expected_result_float; + double b, expected_result_double; + long long int c, expected_result_llint; + unsigned long long int u; + __int128_t d; + __uint128_t u128; + unsigned long long expected_result_uint128[2] ; + __float128 e; + long double ld; // another 128-bit float version + + union conv_t { + float a; + double b; + long long int c; + long long int128[2] ; + unsigned long long uint128[2] ; + unsigned long long int u; + __int128_t d; + __uint128_t u128; + __float128 e; + long double ld; // another 128-bit float version + } conv, conv_result; + + c = 20; + expected_result_llint = 20.00000; + a = conv_i_2_fp (c); + + if (a != expected_result_llint) { +#if DEBUG + printf("ERROR: conv_i_2_fp(%lld) = %10.5f\n", c, a); + printf("\n does not match expected_result = %10.5f\n\n", + expected_result_llint); +#else + abort(); +#endif + } + + c = 20; + expected_result_double = 20.00000; + b = conv_i_2_fpd (c); + + if (b != expected_result_double) { +#if DEBUG + printf("ERROR: conv_i_2_fpd(%lld) = %10.5f\n", d, b); + printf("\n does not match expected_result = %10.5f\n\n", + expected_result_double); + #else + abort(); +#endif + } + + u = 20; + expected_result_double = 20.00000; + b = conv_ui_2_fpd (u); + + if (b != expected_result_double) { +#if DEBUG + printf("ERROR: conv_ui_2_fpd(%llu) = %10.5f\n", u, b); + printf("\n does not match expected_result = %10.5f\n\n", + expected_result_double); + #else + abort(); +#endif + } + + d = -3210; + d = (d * 10000000000) + 9876543210; + conv_result.e = conv_i128_2_fp128 (d); + expected_result_uint128[1] = 0xc02bd2f9068d1160; + expected_result_uint128[0] = 0x0; + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_i128_2_fp128(-32109876543210) = (result in hex) 0x%llx %llx\n", + conv.uint128[1], conv.uint128[0]); + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + d = 123; + d = (d * 10000000000) + 1234567890; + conv_result.ld = conv_i128_2_fp128 (d); + expected_result_uint128[1] = 0x0; + expected_result_uint128[0] = 0x4271eab4c8ed2000; + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_i128_2_fp128(1231234567890) = (result in hex) 0x%llx %llx\n", + conv.uint128[1], conv.uint128[0]); + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + u128 = 8760; + u128 = (u128 * 10000000000) + 1234567890; + conv_result.e = conv_ui128_2_fp128 (u128); + expected_result_uint128[1] = 0x402d3eb101df8b48; + expected_result_uint128[0] = 0x0; + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_ui128_2_fp128(87601234567890) = (result in hex) 0x%llx %llx\n", + conv.uint128[1], conv.uint128[0]); + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + u128 = 3210; + u128 = (u128 * 10000000000) + 9876543210; + expected_result_uint128[1] = 0x402bd3429c8feea0; + expected_result_uint128[0] = 0x0; + conv_result.e = conv_ui128_2_fp128 (u128); + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_ui128_2_fp128(32109876543210) = (result in hex) 0x%llx %llx\n", + conv.uint128[1], conv.uint128[0]); + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + conv.e = 12345.6789; + expected_result_uint128[1] = 0x1407374883526960; + expected_result_uint128[0] = 0x3039; + + conv_result.d = conv_fp128_2_i128 (conv.e); + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_fp128_2_i128(0x%llx %llx) = ", + conv.uint128[1], conv.uint128[0]); + printf("0x%llx %llx\n", conv_result.uint128[1], conv_result.uint128[0]); + + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + conv.e = -6789.12345; + expected_result_uint128[1] = 0x0; + expected_result_uint128[0] = 0xffffffffffffe57b; + conv_result.d = conv_fp128_2_i128 (conv.e); + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_fp128_2_i128(0x%llx %llx) = ", + conv.uint128[1], conv.uint128[0]); + printf("0x%llx %llx\n", conv_result.uint128[1], conv_result.uint128[0]); + + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + conv.e = 6789.12345; + expected_result_uint128[1] = 0x0; + expected_result_uint128[0] = 0x1a85; + conv_result.d = conv_fp128_2_ui128 (conv.e); + + if ((conv_result.uint128[1] != expected_result_uint128[1]) + && (conv_result.uint128[0] != expected_result_uint128[0])) { +#if DEBUG + printf("ERROR: conv_fp128_2_ui128(0x%llx %llx) = ", + conv.uint128[1], conv.uint128[0]); + printf("0x%llx %llx\n", conv_result.uint128[1], conv_result.uint128[0]); + + printf("\n does not match expected_result = (result in hex) 0x%llx %llx\n\n", + expected_result_uint128[1], expected_result_uint128[0]); + #else + abort(); +#endif + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c index 9a20537..de22176 100644 --- a/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c @@ -64,142 +64,262 @@ TEST(vboolchar_t); TEST(vuint_t); /* Recreate with: - grep ' \*fuse_' fusion-p10-2logical.s|sed -e 's,^.*\*,,' |sort -k 7,7 |uniq -c|awk '{l=30-length($2); printf("/%s* { %s { scan-assembler-times \"%s\"%-*s %4d } } *%s/\n","","dg-final",$2,l,"",$1,"");}' + grep ' \*fuse_' fusion-p10-2logical.s|sed -e 's,^.*\*,,' |sort -k 7,7 |uniq -c|awk '{l=30-length($2); printf("/%s* { %s { scan-assembler-times \"%s\"%-*s %4d { target lp64 } } } *%s/\n","","dg-final",$2,l,"",$1,"");}' */ -/* { dg-final { scan-assembler-times "fuse_and_and/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_and_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_and/1" 26 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_and/2" 48 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_and/3" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_or/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_or/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_or/2" 32 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_orc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_orc/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_orc/2" 48 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_xor/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_xor/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_andc_xor/2" 32 } } */ -/* { dg-final { scan-assembler-times "fuse_and_eqv/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_and_eqv/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_and_or/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_and_or/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_and_orc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_and_orc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_and_xor/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_and_xor/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_andc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_andc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_or/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_eqv_or/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_andc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_andc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_or/0" 14 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_or/1" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_or/2" 72 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_or/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_orc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_orc/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_and/0" 28 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_and/1" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_and/2" 48 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_and/3" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_andc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_andc/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_or/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_or/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_orc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_orc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_or_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_andc/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_andc/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_and/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_and/2" 32 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_andc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_andc/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_andc/2" 48 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_or/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_or/1" 10 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_or/2" 72 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_or/3" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_xor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_xor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_orc_xor/2" 48 } } */ -/* { dg-final { scan-assembler-times "fuse_or_eqv/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_or_eqv/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_or_or/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_or/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_xor/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_or_xor/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vand/1" 10 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vand/3" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vorc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vorc/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vxor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vandc_vxor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vand_vand/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vand_veqv/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vand_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vand_vorc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vand_vxor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_veqv_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_veqv_vandc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_veqv_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vandc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vor/0" 14 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vor/1" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vor/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnand_vorc/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vand/0" 14 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vand/1" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vand/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vandc/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vnor_vorc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vand/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vandc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vandc/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vor/1" 10 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vor/3" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vxor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vorc_vxor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vor_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vor_vandc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vor_veqv/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vor_vor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vor_vxor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vandc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_veqv/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vnand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vor/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vorc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_vxor_vxor/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_and/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_andc/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_andc/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_eqv/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_eqv/3" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_nand/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_nand/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_or/0" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_or/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_orc/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_orc/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_xor/1" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_xor_xor/2" 16 } } */ +/* { dg-final { scan-assembler-times "fuse_and_and/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/1" 26 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/2" 48 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/3" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_or/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_or/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_or/2" 32 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/2" 48 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_xor/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_xor/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_xor/2" 32 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_eqv/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_eqv/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_or/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_or/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_orc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_orc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_xor/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_xor/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_andc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_andc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_or/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_or/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_andc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_andc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/0" 14 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/1" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/2" 72 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_orc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_orc/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/0" 28 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/1" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/2" 48 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/3" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_andc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_andc/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_or/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_or/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_orc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_orc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_andc/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_andc/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_and/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_and/2" 32 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/2" 48 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/1" 10 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/2" 72 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/3" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/2" 48 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_eqv/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_eqv/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_or/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_or/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_xor/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_xor/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/1" 10 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/3" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vorc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vorc/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vxor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vxor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vand/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_veqv/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vorc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vxor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vandc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vandc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/0" 14 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/1" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vorc/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/0" 14 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/1" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vandc/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vorc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vand/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vandc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vandc/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/1" 10 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/3" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vxor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vxor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vandc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_veqv/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vxor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vandc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_veqv/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vnand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vor/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vorc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vxor/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_and/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_andc/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_andc/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_eqv/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_eqv/3" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_nand/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_nand/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_or/0" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_or/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_orc/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_orc/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_xor/1" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_xor/2" 16 { target lp64 } } } */ + +/* { dg-final { scan-assembler-times "fuse_and_and/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/1" 56 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_and/3" 24 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_or/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_or/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/1" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_orc/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_xor/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_andc_xor/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_eqv/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_eqv/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_or/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_orc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_orc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_xor/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_andc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_andc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_or/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_eqv_or/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_andc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_andc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/0" 56 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/2" 24 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_or/3" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_orc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_orc/3" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/0" 70 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/1" 10 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_and/3" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_andc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_andc/3" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_or/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_or/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_orc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_orc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_andc/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_and/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/1" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_andc/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/2" 24 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_or/3" 24 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/1" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_orc_xor/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_eqv/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_eqv/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_or/1" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_xor/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/1" 10 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vand/3" 6 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vor/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vorc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vorc/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vxor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vandc_vxor/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vand/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_veqv/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vorc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vand_vxor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vandc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_veqv_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vandc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/0" 14 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/1" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vor/3" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnand_vorc/3" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/0" 14 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/1" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vand/3" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vandc/3" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vnor_vorc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vand/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vandc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vandc/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/1" 10 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vor/3" 6 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vxor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vorc_vxor/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vandc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_veqv/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vor/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vor_vxor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vandc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_veqv/3" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vnand/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vor/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vorc/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vxor_vxor/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_and/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_andc/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_eqv/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_eqv/3" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_nand/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_nand/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_or/0" 40 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_orc/0" 32 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_orc/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_xor_xor/1" 40 { target ilp32 } } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c index 494ccdb..f70b56a 100644 --- a/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c @@ -32,9 +32,16 @@ vlong vaddadd2(vlong s, vlong a, vlong b, vlong c) return a+b+c; } -/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 } } */ -/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 } } */ -/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 } } */ -/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 } } */ -/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 } } */ -/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 { target lp64 } } } */ + +/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 0 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c index 99f9e92..ea1d5d0 100644 --- a/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c @@ -53,14 +53,26 @@ TEST(int16_t) TEST(uint8_t) TEST(int8_t) -/* { dg-final { scan-assembler-times "lbz_cmpldi_cr0_QI_clobber_CCUNS_zero" 2 } } */ -/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_DI_CC_none" 4 } } */ -/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_clobber_CC_none" 4 } } */ -/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_DI_CCUNS_none" 1 } } */ -/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_clobber_CCUNS_none" 1 } } */ -/* { dg-final { scan-assembler-times "lha_cmpdi_cr0_HI_clobber_CC_sign" 8 } } */ -/* { dg-final { scan-assembler-times "lhz_cmpldi_cr0_HI_clobber_CCUNS_zero" 2 } } */ -/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_EXTSI_CC_sign" 3 } } */ -/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none" 4 } } */ -/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero" 2 } } */ -/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_clobber_CCUNS_none" 2 } } */ +/* { dg-final { scan-assembler-times "lbz_cmpldi_cr0_QI_clobber_CCUNS_zero" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_DI_CC_none" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_clobber_CC_none" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_DI_CCUNS_none" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_clobber_CCUNS_none" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lha_cmpdi_cr0_HI_clobber_CC_sign" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lhz_cmpldi_cr0_HI_clobber_CCUNS_zero" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_EXTSI_CC_sign" 3 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_clobber_CCUNS_none" 2 { target lp64 } } } */ + +/* { dg-final { scan-assembler-times "lbz_cmpldi_cr0_QI_clobber_CCUNS_zero" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_DI_CC_none" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpdi_cr0_DI_clobber_CC_none" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_DI_CCUNS_none" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "ld_cmpldi_cr0_DI_clobber_CCUNS_none" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lha_cmpdi_cr0_HI_clobber_CC_sign" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lhz_cmpldi_cr0_HI_clobber_CCUNS_zero" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_EXTSI_CC_sign" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lwa_cmpdi_cr0_SI_clobber_CC_none" 9 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_EXTSI_CCUNS_zero" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "lwz_cmpldi_cr0_SI_clobber_CCUNS_none" 6 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-logadd.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-logadd.c index b7022b2..21d7f40 100644 --- a/gcc/testsuite/gcc.target/powerpc/fusion-p10-logadd.c +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-logadd.c @@ -50,48 +50,93 @@ TEST(int32_t); TEST(uint64_t); TEST(int64_t); -/* { dg-final { scan-assembler-times "fuse_nand_rsubf/0" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_rsubf/2" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_rsubf/0" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_rsubf/2" 2 } } */ -/* { dg-final { scan-assembler-times "fuse_add_nand/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_add_nor/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_add_or/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_and_rsubf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_and_subf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_add/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_subf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_add/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_subf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_or_rsubf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_or_subf/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nand/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nand/1" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nor/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nor/1" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_or/0" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_or/1" 4 } } */ -/* { dg-final { scan-assembler-times "fuse_and_add/0" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_or_add/0" 6 } } */ -/* { dg-final { scan-assembler-times "fuse_add_and/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_add_and/2" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_and/0" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_and/1" 8 } } */ -/* { dg-final { scan-assembler-times "fuse_add_nand/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_add_nor/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_add_or/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_and_rsubf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_and_subf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_add/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_nand_subf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_add/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_nor_subf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_or_rsubf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_or_subf/2" 12 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_and/2" 16 } } */ -/* { dg-final { scan-assembler-times "fuse_and_add/2" 22 } } */ -/* { dg-final { scan-assembler-times "fuse_or_add/2" 22 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nand/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_nor/2" 24 } } */ -/* { dg-final { scan-assembler-times "fuse_subf_or/2" 24 } } */ +/* { dg-final { scan-assembler-times "fuse_nand_rsubf/0" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_rsubf/2" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_rsubf/0" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_rsubf/2" 2 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nand/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nor/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_or/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_rsubf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_subf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_add/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_subf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_add/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_subf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_rsubf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_subf/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/1" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/1" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/0" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/1" 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_add/0" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_add/0" 6 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_and/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_and/2" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/0" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/1" 8 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nand/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nor/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_or/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_rsubf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_subf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_add/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_subf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_add/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_subf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_rsubf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_subf/2" 12 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/2" 16 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_add/2" 22 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_add/2" 22 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/2" 24 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/2" 24 { target lp64 } } } */ + +/* { dg-final { scan-assembler-times "fuse_nand_rsubf/0" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_rsubf/2" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_rsubf/0" 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_rsubf/2" 0 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nand/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nor/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_or/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_rsubf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_subf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_add/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_subf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_add/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_subf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_rsubf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_subf/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/1" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/1" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/0" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/1" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_add/0" 6 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_add/0" 6 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_and/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_and/2" 4 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/0" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/1" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nand/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_nor/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_add_or/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_rsubf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_subf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_add/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nand_subf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_add/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_nor_subf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_rsubf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_subf/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_and/2" 8 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_and_add/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_or_add/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nand/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_nor/2" 16 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times "fuse_subf_or/2" 16 { target ilp32 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c new file mode 100644 index 0000000..1255ee9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c @@ -0,0 +1,2370 @@ +/* { dg-do run } */ +/* { dg-options "-mcpu=power10 -save-temps" } */ +/* { dg-require-effective-target power10_hw } */ + +/* Check that the expected 128-bit instructions are generated if the processor + supports the 128-bit integer instructions. */ +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mvslq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsrq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvsraq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlq\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlqnm\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvrlqmi\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvcmpequq\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mvmuloud\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulesd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulosd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulld\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivsq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivuq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdivesq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvdiveuq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmodsq\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmoduq\M} 1 } } */ + +#include <altivec.h> + +#define DEBUG 0 + +#if DEBUG +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + + +void print_i128(__int128_t val) +{ + printf(" %lld %llu (0x%llx %llx)", + (signed long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF), + (unsigned long long)(val >> 64), + (unsigned long long)(val & 0xFFFFFFFFFFFFFFFF)); +} +#endif + +void abort (void); + +__attribute__((noinline)) +__int128_t shift_right (__int128_t a, __uint128_t b) +{ + return a >> b; +} + +__attribute__((noinline)) +__int128_t shift_left (__int128_t a, __uint128_t b) +{ + return a << b; +} + +int main () +{ + int i, result_int; + + __int128_t arg1, result; + __uint128_t uarg2; + + _Decimal128 arg1_dfp128, result_dfp128, expected_result_dfp128; + + struct conv_t { + __uint128_t u128; + _Decimal128 d128; + } conv, conv2; + + vector signed long long int vec_arg1_di, vec_arg2_di; + vector signed long long int vec_result_di, vec_expected_result_di; + vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di; + vector unsigned long long int vec_uresult_di; + vector unsigned long long int vec_uexpected_result_di; + + __int128_t expected_result; + __uint128_t uexpected_result; + + vector __int128 vec_arg1, vec_arg2, vec_result; + vector unsigned __int128 vec_uarg1, vec_uarg2, vec_uarg3, vec_uresult; + vector bool __int128 vec_result_bool; + + /* sign extend double to 128-bit integer */ + vec_arg1_di[0] = 1000; + vec_arg1_di[1] = -123456; + + expected_result = 1000; + + vec_result = vec_signextq (vec_arg1_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_signextq ((long long) %lld) = ", vec_arg1_di[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1_di[0] = -123456; + vec_arg1_di[1] = 1000; + + expected_result = -123456; + + vec_result = vec_signextq (vec_arg1_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_signextq ((long long) %lld) = ", vec_arg1_di[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* test shift 128-bit integers. + Note, shift amount is given by the lower 7-bits of the shift amount. */ + vec_arg1[0] = 3; + vec_uarg2[0] = 2; + expected_result = vec_arg1[0]*4; + + vec_result = vec_sl (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sl(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + arg1 = vec_result[0]; + uarg2 = 4; + expected_result = arg1*16; + + result = arg1 << uarg2; + + if (result != expected_result) { +#if DEBUG + printf("ERROR: int128 << uint128): "); + print_i128(arg1); + printf(" << %lld", uarg2 & 0xFF); + printf(" = "); + print_i128(result); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 3; + vec_uarg2[0] = 2; + uexpected_result = vec_uarg1[0]*4; + + vec_uresult = vec_sl (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sl(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12; + vec_uarg2[0] = 2; + expected_result = vec_arg1[0]/4; + + vec_result = vec_sr (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sr(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 48; + vec_uarg2[0] = 2; + uexpected_result = vec_uarg1[0]/4; + + vec_uresult = vec_sr (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sr(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld", vec_uarg2[0] & 0xFF); + printf(" = "); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + arg1 = vec_uresult[0]; + uarg2 = 4; + expected_result = arg1/16; + + result = arg1 >> uarg2; + + if (result != expected_result) { +#if DEBUG + printf("ERROR: int128 >> uint128: "); + print_i128(arg1); + printf(" >> %lld", uarg2 & 0xFF); + printf(" = "); + print_i128(result); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + expected_result = 0x0000000012345678ULL; + expected_result = (expected_result << 64) | 0x90ABCDEFAABBCCDDULL; + + vec_result = vec_sra (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_sra(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + uexpected_result = 0xFFFFFFFFFFFFAABBLL; + uexpected_result = (uexpected_result << 64) | 0xCCDDEEFF11221234ULL; + + vec_uresult = vec_sra (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_sra(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld = \n", vec_uarg2[0] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + expected_result = 0x90ABCDEFAABBCCDDULL; + expected_result = (expected_result << 64) | 0xEEFF112212345678ULL; + + vec_result = vec_rl (vec_arg1, vec_uarg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rl(int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + uexpected_result = 0x11221234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEEFFULL; + + vec_uresult = vec_rl (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rl(uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" >> %lld = \n", vec_uarg2[0]); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2. + Then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_uarg2[0] = 32; + vec_uarg3[0] = (32 << 8) | 95; + expected_result = 0xaabbccddULL; + expected_result = (expected_result << 64) | 0xeeff112200000000ULL; + + vec_result = vec_rlnm (vec_arg1, vec_uarg2, vec_uarg3); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rlnm(int128, uint128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld = \n", vec_uarg3[0] & 0xFF); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + + + /* vec_rlnm(arg1, arg2, arg3) + result - rotate each element of arg1 left by shift in element of arg2; + then AND with mask whose start/stop bits are specified in element of + arg3. */ + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 48; + vec_uarg3[0] = (8 << 8) | 119; + + uexpected_result = 0x00221234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDEE00ULL; + + vec_uresult = vec_rlnm (vec_uarg1, vec_uarg2, vec_uarg3); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rlnm(uint128, uint128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld = \n", vec_uarg3[0] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlmi(R, A, B) + Result value: Each element of R is obtained by rotating the corresponding + element of A left by the number of bits specified by the corresponding element + of B. */ + + vec_arg1[0] = 0x1234567890ABCDEFULL; + vec_arg1[0] = (vec_arg1[0] << 64) | 0xAABBCCDDEEFF1122ULL; + vec_arg2[0] = 0x000000000000DEADULL; + vec_arg2[0] = (vec_arg2[0] << 64) | 0x0000BEEF00000000ULL; + vec_uarg3[0] = 96 << 16 | 127 << 8 | 32; + expected_result = 0x000000000000DEADULL; + expected_result = (expected_result << 64) | 0x0000BEEF12345678ULL; + + vec_result = vec_rlmi (vec_arg1, vec_arg2, vec_uarg3); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_rlmi(int128, int128, uint128): "); + print_i128(vec_arg1[0]); + printf(" << %lld = \n", vec_uarg2_di[1] & 0xFF); + print_i128(vec_result[0]); + printf("\n does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* vec_rlmi(R, A, B) + Result value: Each element of R is obtained by rotating the corresponding + element of A left by the number of bits specified by the corresponding element + of B. */ + + vec_uarg1[0] = 0xAABBCCDDEEFF1122ULL; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 0x1234567890ABCDEFULL; + vec_uarg2[0] = 0xDEAD000000000000ULL; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 0x000000000000BEEFULL; + vec_uarg3[0] = 16 << 16 | 111 << 8 | 48; + uexpected_result = 0xDEAD1234567890ABULL; + uexpected_result = (uexpected_result << 64) | 0xCDEFAABBCCDDBEEFULL; + + vec_uresult = vec_rlmi (vec_uarg1, vec_uarg2, vec_uarg3); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_rlmi(uint128, unit128, uint128): "); + print_i128(vec_uarg1[0]); + printf(" << %lld = \n", vec_uarg3[1] & 0xFF); + print_i128(vec_uresult[0]); + printf("\n does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* 128-bit compare tests, result is all 1's if true */ + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1[0] = 2468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + uexpected_result = 0xFFFFFFFFFFFFFFFFULL; + uexpected_result = (uexpected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpgt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != uexpected_result) { +#if DEBUG + printf("ERROR: unsigned vec_cmpgt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpgt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed vec_cmpgt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR:not equal signed vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpeq (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed equal vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned not equal vec_cmpeq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpeq (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: equal unsigned vec_cmpeq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned not equal vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpne (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: equal unsigned vec_cmpne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpne (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR:not equal signed vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmpne (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed equal vec_cmpne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmplt (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmplt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0x0ULL; + + vec_result_bool = vec_cmplt (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmplt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmple ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmple (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmple ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 12468; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 > arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 1234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 12468; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 < arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_uarg1, vec_uarg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: unsigned arg1 = arg2 vec_cmpge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = 12468; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = -1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 > arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -1234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 12468; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + expected_result = 0x0; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 < arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + expected_result = 0xFFFFFFFFFFFFFFFFULL; + expected_result = (expected_result << 64) | 0xFFFFFFFFFFFFFFFFULL; + + vec_result_bool = vec_cmpge (vec_arg1, vec_arg2); + + if (vec_result_bool[0] != expected_result) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_cmpge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed."); + print_i128(vec_result_bool[0]); + printf("\n Result does not match expected_result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + +#if 1 + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_eq (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_eq (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_eq (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_eq (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_ne (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_ne (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_ne (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_ne (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_lt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_lt (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_lt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_lt (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_all_ge (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_all_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_all_ge (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_all_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_all_ge (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_all_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_all_ge (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_all_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_eq (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_eq (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_eq ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_eq (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_eq (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_eq ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_ne (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_ne (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_ne ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_ne (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_ne (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_ne ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_lt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_lt (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_lt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_lt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_lt (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_lt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_gt (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_gt ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_gt (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_le (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_le ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_le (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_le ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + vec_arg1 = vec_arg2; + + result_int = vec_any_ge (vec_arg1, vec_arg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: signed arg1 = arg2 vec_any_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1[0] = -234; + vec_arg1[0] = (vec_arg1[0] << 64) | 4567; + vec_arg2[0] = 1234; + vec_arg2[0] = (vec_arg2[0] << 64) | 4567; + + result_int = vec_any_ge (vec_arg1, vec_arg2); + + if (result_int) { +#if DEBUG + printf("ERROR: signed arg1 != arg2 vec_any_ge ( "); + print_i128(vec_arg1[0]); + printf(", "); + print_i128(vec_arg2[0]); + printf(") failed.\n\n"); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + vec_uarg1 = vec_uarg2; + + result_int = vec_any_ge (vec_uarg1, vec_uarg2); + + if (!result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 = uarg2 vec_any_ge ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 234; + vec_uarg1[0] = (vec_uarg1[0] << 64) | 4567; + vec_uarg2[0] = 1234; + vec_uarg2[0] = (vec_uarg2[0] << 64) | 4567; + + result_int = vec_any_ge (vec_uarg1, vec_uarg2); + + if (result_int) { +#if DEBUG + printf("ERROR: unsigned uarg1 != uarg2 vec_any_gt ( "); + print_i128(vec_uarg1[0]); + printf(", "); + print_i128(vec_uarg2[0]); + printf(") failed.\n\n"); +#else + abort(); +#endif + } +#endif + + /* Vector multiply Even and Odd tests */ + vec_arg1_di[0] = 200; + vec_arg1_di[1] = 400; + vec_arg2_di[0] = 1234; + vec_arg2_di[1] = 4567; + expected_result = vec_arg1_di[0] * vec_arg2_di[0]; + + vec_result = vec_mule (vec_arg1_di, vec_arg2_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mule (signed, signed) failed.\n"); + printf(" vec_arg1_di[0] = %lld\n", vec_arg1_di[0]); + printf(" vec_arg2_di[0] = %lld\n", vec_arg2_di[0]); + printf("Result = "); + print_i128(vec_result[0]); + printf("\nExpected Result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_arg1_di[0] = -200; + vec_arg1_di[1] = -400; + vec_arg2_di[0] = 1234; + vec_arg2_di[1] = 4567; + expected_result = vec_arg1_di[1] * vec_arg2_di[1]; + + vec_result = vec_mulo (vec_arg1_di, vec_arg2_di); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mulo (signed, signed) failed.\n"); + printf(" vec_arg1_di[1] = %lld\n", vec_arg1_di[1]); + printf(" vec_arg2_di[1] = %lld\n", vec_arg2_di[1]); + printf("Result = "); + print_i128(vec_result[0]); + printf("\nExpected Result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1_di[0] = 200; + vec_uarg1_di[1] = 400; + vec_uarg2_di[0] = 1234; + vec_uarg2_di[1] = 4567; + uexpected_result = vec_uarg1_di[0] * vec_uarg2_di[0]; + + vec_uresult = vec_mule (vec_uarg1_di, vec_uarg2_di); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mule (unsigned, unsigned) failed.\n"); + printf(" vec_uarg1_di[1] = %lld\n", vec_uarg1_di[1]); + printf(" vec_uarg2_di[1] = %lld\n", vec_uarg2_di[1]); + printf("Result = "); + print_i128(vec_uresult[0]); + printf("\nExpected Result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1_di[0] = 200; + vec_uarg1_di[1] = 400; + vec_uarg2_di[0] = 1234; + vec_uarg2_di[1] = 4567; + uexpected_result = vec_uarg1_di[1] * vec_uarg2_di[1]; + + vec_uresult = vec_mulo (vec_uarg1_di, vec_uarg2_di); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mulo (unsigned, unsigned) failed.\n"); + printf(" vec_uarg1_di[0] = %lld\n", vec_uarg1_di[0]); + printf(" vec_uarg2_di[0] = %lld\n", vec_uarg2_di[0]); + printf("Result = "); + print_i128(vec_uresult[0]); + printf("\nExpected Result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector Multiply Longword */ + vec_arg1_di[0] = 100; + vec_arg1_di[1] = -123456; + + vec_arg2_di[0] = 123; + vec_arg2_di[1] = 1000; + + vec_expected_result_di[0] = 12300; + vec_expected_result_di[1] = -123456000; + + vec_result_di = vec_arg1_di * vec_arg2_di; + + for (i = 0; i<2; i++) { + if (vec_result_di[i] != vec_expected_result_di[i]) { +#if DEBUG + printf("ERROR: vector multipy [%d] ((long long) %lld) = ", i, + vec_result_di[i]); + printf("\n does not match expected_result [%d] = ((long long) %lld)", i, + vec_expected_result_di[i]); + printf("\n\n"); +#else + abort(); +#endif + } + } + + /* Vector Divide Quadword */ + vec_arg1[0] = -12345678; + vec_arg2[0] = 2; + expected_result = -6172839; + + vec_result = vec_div (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_div (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 24680; + vec_uarg2[0] = 4; + uexpected_result = 6170; + + vec_uresult = vec_div (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_div (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector Divide Extended Quadword */ + vec_arg1[0] = -20; // has 128-bit of zero concatenated onto it + vec_arg2[0] = 0x2000000000000000; + vec_arg2[0] = vec_arg2[0] << 64; + expected_result = -160; + + vec_result = vec_dive (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_dive (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 20; // has 128-bit of zero concatenated onto it + vec_uarg2[0] = 0x4000000000000000; + vec_uarg2[0] = vec_uarg2[0] << 64; + uexpected_result = 80; + + vec_uresult = vec_dive (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_dive (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* Vector modulo quad word */ + vec_arg1[0] = -12345675; + vec_arg2[0] = 2; + expected_result = -1; + + vec_result = vec_mod (vec_arg1, vec_arg2); + + if (vec_result[0] != expected_result) { +#if DEBUG + printf("ERROR: vec_mod (signed, signed) failed.\n"); + printf("vec_arg1[0] = "); + print_i128(vec_arg1[0]); + printf("\nvec_arg2[0] = "); + print_i128(vec_arg2[0]); + printf("\nResult = "); + print_i128(vec_result[0]); + printf("\nExpected result = "); + print_i128(expected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + vec_uarg1[0] = 24685; + vec_uarg2[0] = 4; + uexpected_result = 1; + + vec_uresult = vec_mod (vec_uarg1, vec_uarg2); + + if (vec_uresult[0] != uexpected_result) { +#if DEBUG + printf("ERROR: vec_mod (unsigned, unsigned) failed.\n"); + printf("vec_uarg1[0] = "); + print_i128(vec_uarg1[0]); + printf("\nvec_uarg2[0] = "); + print_i128(vec_uarg2[0]); + printf("\nResult = "); + print_i128(vec_uresult[0]); + printf("\nExpected result = "); + print_i128(uexpected_result); + printf("\n\n"); +#else + abort(); +#endif + } + + /* DFP to __int128 and __int128 to DFP conversions */ + /* Print the DFP value as an unsigned int so we can see the bit patterns. */ + conv.u128 = 0x2208000000000000ULL; + conv.u128 = (conv.u128 << 64) | 0x4ULL; //DFP bit pattern for integer 4 + expected_result_dfp128 = conv.d128; + + arg1 = 4; + + conv.d128 = (_Decimal128) arg1; + + result_dfp128 = (_Decimal128) arg1; + if (((conv.u128 >>64) != 0x2208000000000000ULL) && + ((conv.u128 & 0xFFFFFFFFFFFFFFFF) != 0x4ULL)) { +#if DEBUG + printf("ERROR: convert int128 value "); + print_i128 (arg1); + conv.d128 = result_dfp128; + printf("\nto DFP value 0x%llx %llx (printed as hex bit string) ", + (unsigned long long)((conv.u128) >>64), + (unsigned long long)((conv.u128) & 0xFFFFFFFFFFFFFFFF)); + + conv.d128 = expected_result_dfp128; + printf("\ndoes not match expected_result = 0x%llx %llx\n\n", + (unsigned long long) (conv.u128>>64), + (unsigned long long) (conv.u128 & 0xFFFFFFFFFFFFFFFF)); +#else + abort(); +#endif + } + + expected_result = 4; + + conv.u128 = 0x2208000000000000ULL; + conv.u128 = (conv.u128 << 64) | 0x4ULL; // 4 as DFP + arg1_dfp128 = conv.d128; + + result = (__int128_t) arg1_dfp128; + + if (result != expected_result) { +#if DEBUG + printf("ERROR: convert DFP value "); + printf("0x%llx %llx (printed as hex bit string) ", + (unsigned long long)(conv.u128>>64), + (unsigned long long)(conv.u128 & 0xFFFFFFFFFFFFFFFF)); + printf("to __int128 value = "); + print_i128 (result); + printf("\ndoes not match expected_result = "); + print_i128 (expected_result); + printf("\n"); +#else + abort(); +#endif + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c new file mode 100644 index 0000000..fdcad01 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c @@ -0,0 +1,128 @@ +/* { dg-do run { target { *-*-linux* && { lp64 && p9vector_hw } } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */ + +/* These builtins were not defined until ISA 3.1 but only require ISA 3.0 + support. */ + +/* { dg-final { scan-assembler-times {\mvextsb2w\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsh2w\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */ + +#include <altivec.h> + +#define DEBUG 0 + +#if DEBUG +#include <stdio.h> +#include <stdlib.h> +#endif + +void abort (void); + +int main () +{ + int i; + + vector signed char vec_arg_qi, vec_result_qi; + vector signed short int vec_arg_hi, vec_result_hi, vec_expected_hi; + vector signed int vec_arg_wi, vec_result_wi, vec_expected_wi; + vector signed long long vec_result_di, vec_expected_di; + + /* test sign extend byte to word */ + vec_arg_qi = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8, + -1, -2, -3, -4, -5, -6, -7, -8}; + vec_expected_wi = (vector signed int) {1, 5, -1, -5}; + + vec_result_wi = vec_signexti (vec_arg_qi); + + for (i = 0; i < 4; i++) + if (vec_result_wi[i] != vec_expected_wi[i]) { +#if DEBUG + printf("ERROR: vec_signexti(char, int): "); + printf("vec_result_wi[%d] != vec_expected_wi[%d]\n", + i, i); + printf("vec_result_wi[%d] = %d\n", i, vec_result_wi[i]); + printf("vec_expected_wi[%d] = %d\n", i, vec_expected_wi[i]); +#else + abort(); +#endif + } + + /* test sign extend byte to double */ + vec_arg_qi = (vector signed char){1, 2, 3, 4, 5, 6, 7, 8, + -1, -2, -3, -4, -5, -6, -7, -8}; + vec_expected_di = (vector signed long long int){1, -1}; + + vec_result_di = vec_signextll(vec_arg_qi); + + for (i = 0; i < 2; i++) + if (vec_result_di[i] != vec_expected_di[i]) { +#if DEBUG + printf("ERROR: vec_signextll(byte, long long int): "); + printf("vec_result_di[%d] != vec_expected_di[%d]\n", i, i); + printf("vec_result_di[%d] = %lld\n", i, vec_result_di[i]); + printf("vec_expected_di[%d] = %lld\n", i, vec_expected_di[i]); +#else + abort(); +#endif + } + + /* test sign extend short to word */ + vec_arg_hi = (vector signed short int){1, 2, 3, 4, -1, -2, -3, -4}; + vec_expected_wi = (vector signed int){1, 3, -1, -3}; + + vec_result_wi = vec_signexti(vec_arg_hi); + + for (i = 0; i < 4; i++) + if (vec_result_wi[i] != vec_expected_wi[i]) { +#if DEBUG + printf("ERROR: vec_signexti(short, int): "); + printf("vec_result_wi[%d] != vec_expected_wi[%d]\n", i, i); + printf("vec_result_wi[%d] = %d\n", i, vec_result_wi[i]); + printf("vec_expected_wi[%d] = %d\n", i, vec_expected_wi[i]); +#else + abort(); +#endif + } + + /* test sign extend short to double word */ + vec_arg_hi = (vector signed short int ){1, 3, 5, 7, -1, -3, -5, -7}; + vec_expected_di = (vector signed long long int){1, -1}; + + vec_result_di = vec_signextll(vec_arg_hi); + + for (i = 0; i < 2; i++) + if (vec_result_di[i] != vec_expected_di[i]) { +#if DEBUG + printf("ERROR: vec_signextll(short, double): "); + printf("vec_result_di[%d] != vec_expected_di[%d]\n", i, i); + printf("vec_result_di[%d] = %lld\n", i, vec_result_di[i]); + printf("vec_expected_di[%d] = %lld\n", i, vec_expected_di[i]); +#else + abort(); +#endif + } + + /* test sign extend word to double word */ + vec_arg_wi = (vector signed int ){1, 3, -1, -3}; + vec_expected_di = (vector signed long long int){1, -1}; + + vec_result_di = vec_signextll(vec_arg_wi); + + for (i = 0; i < 2; i++) + if (vec_result_di[i] != vec_expected_di[i]) { +#if DEBUG + printf("ERROR: vec_signextll(word, double): "); + printf("vec_result_di[%d] != vec_expected_di[%d]\n", i, i); + printf("vec_result_di[%d] = %lld\n", i, vec_result_di[i]); + printf("vec_expected_di[%d] = %lld\n", i, vec_expected_di[i]); +#else + abort(); +#endif + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c index 5512c0f..6834733 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c @@ -1,5 +1,5 @@ /* { dg-do compile } -/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-require-effective-target p9vector_hw } */ /* { dg-options "-O2 -mdejagnu-cpu=power9" } */ #include <altivec.h> @@ -62,6 +62,6 @@ rlnm_test_2 (vector unsigned long long x, vector unsigned long long y, /* { dg-final { scan-assembler-times "vextsb2d" 1 } } */ /* { dg-final { scan-assembler-times "vslw" 1 } } */ /* { dg-final { scan-assembler-times "vsld" 1 } } */ -/* { dg-final { scan-assembler-times "xxlor" 2 } } */ +/* { dg-final { scan-assembler-times "xxlor" 4 } } */ /* { dg-final { scan-assembler-times "vrlwnm" 2 } } */ /* { dg-final { scan-assembler-times "vrldnm" 2 } } */ diff --git a/gcc/testsuite/gdc.test/compilable/aggr_alignment.d b/gcc/testsuite/gdc.test/compilable/aggr_alignment.d index bf602ff..0c727e2 100644 --- a/gcc/testsuite/gdc.test/compilable/aggr_alignment.d +++ b/gcc/testsuite/gdc.test/compilable/aggr_alignment.d @@ -27,6 +27,26 @@ static assert(C2.int1.offsetof == payloadOffset + 8); static assert(C2.alignof == size_t.sizeof); static assert(__traits(classInstanceSize, C2) == payloadOffset + 12); +align(8) struct PaddedStruct +{ + bool flag; + align(2) S1 s1; +} + +static assert(PaddedStruct.s1.offsetof == 2); +static assert(PaddedStruct.alignof == 8); +static assert(PaddedStruct.sizeof == 16); + +align(1) struct UglyStruct +{ + bool flag; + int i; + ubyte u; +} + +static assert(UglyStruct.i.offsetof == 4); +static assert(UglyStruct.alignof == 1); +static assert(UglyStruct.sizeof == 9); /***************************************************/ // https://issues.dlang.org/show_bug.cgi?id=19914 diff --git a/gcc/testsuite/gdc.test/compilable/betterCarray.d b/gcc/testsuite/gdc.test/compilable/betterCarray.d index 74c80be..3f48b04 100644 --- a/gcc/testsuite/gdc.test/compilable/betterCarray.d +++ b/gcc/testsuite/gdc.test/compilable/betterCarray.d @@ -15,3 +15,13 @@ int foo(int[] a, int i) { return a[i]; } + +/**********************************************/ +// https://issues.dlang.org/show_bug.cgi?id=19234 +void issue19234() +{ + static struct A {} + A[10] a; + A[10] b; + b[] = a[]; +} diff --git a/gcc/testsuite/gfortran.dg/vect/pr100981-1.f90 b/gcc/testsuite/gfortran.dg/vect/pr100981-1.f90 new file mode 100644 index 0000000..6f11121 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr100981-1.f90 @@ -0,0 +1,22 @@ +! { dg-do compile } +! { dg-additional-options "-O3 -ftree-parallelize-loops=2 -fno-signed-zeros -fno-trapping-math" } +! { dg-additional-options "-march=armv8.3-a" { target aarch64*-*-* } } + +complex function cdcdot(n, cx) + implicit none + + integer :: n, i, kx + complex :: cx(*) + double precision :: dsdotr, dsdoti, dt1, dt3 + + kx = 1 + do i = 1, n + dt1 = real(cx(kx)) + dt3 = aimag(cx(kx)) + dsdotr = dsdotr + dt1 * 2 - dt3 * 2 + dsdoti = dsdoti + dt1 * 2 + dt3 * 2 + kx = kx + 1 + end do + cdcdot = cmplx(real(dsdotr), real(dsdoti)) + return +end function cdcdot diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index ba36348..ee79808 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5247,7 +5247,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info); } - scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt); + scalar_dest = gimple_get_lhs (orig_stmt_info->stmt); scalar_type = TREE_TYPE (scalar_dest); scalar_results.create (group_size); new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c index b25655c..2ed49cd 100644 --- a/gcc/tree-vect-slp-patterns.c +++ b/gcc/tree-vect-slp-patterns.c @@ -544,6 +544,8 @@ complex_pattern::build (vec_info *vinfo) { /* Calculate the location of the statement in NODE to replace. */ stmt_info = SLP_TREE_REPRESENTATIVE (node); + stmt_vec_info reduc_def + = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)); gimple* old_stmt = STMT_VINFO_STMT (stmt_info); tree lhs_old_stmt = gimple_get_lhs (old_stmt); tree type = TREE_TYPE (lhs_old_stmt); @@ -568,9 +570,10 @@ complex_pattern::build (vec_info *vinfo) = vinfo->add_pattern_stmt (call_stmt, stmt_info); /* Make sure to mark the representative statement pure_slp and - relevant. */ + relevant and transfer reduction info. */ STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope; STMT_SLP_TYPE (call_stmt_info) = pure_slp; + STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def; gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt)); STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index cc734e0..1915d74 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *, slp_tree, stmt_vector_for_cost *); +static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree); static object_allocator<_slp_tree> *slp_tree_pool; static slp_tree slp_first_node; @@ -108,6 +109,7 @@ _slp_tree::_slp_tree () SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; SLP_TREE_REF_COUNT (this) = 1; + this->failed = NULL; this->max_nunits = 1; this->lanes = 0; } @@ -129,6 +131,8 @@ _slp_tree::~_slp_tree () SLP_TREE_VEC_DEFS (this).release (); SLP_TREE_LOAD_PERMUTATION (this).release (); SLP_TREE_LANE_PERMUTATION (this).release (); + if (this->failed) + free (failed); } /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ @@ -1414,6 +1418,30 @@ bst_traits::equal (value_type existing, value_type candidate) return true; } +/* ??? This was std::pair<std::pair<tree_code, vect_def_type>, tree> + but then vec::insert does memmove and that's not compatible with + std::pair. */ +struct chain_op_t +{ + chain_op_t (tree_code code_, vect_def_type dt_, tree op_) + : code (code_), dt (dt_), op (op_) {} + tree_code code; + vect_def_type dt; + tree op; +}; + +/* Comparator for sorting associatable chains. */ + +static int +dt_sort_cmp (const void *op1_, const void *op2_, void *) +{ + auto *op1 = (const chain_op_t *) op1_; + auto *op2 = (const chain_op_t *) op2_; + if (op1->dt != op2->dt) + return (int)op1->dt - (int)op2->dt; + return (int)op1->code - (int)op2->code; +} + typedef hash_map <vec <stmt_vec_info>, slp_tree, simple_hashmap_traits <bst_traits, slp_tree> > scalar_stmts_to_slp_tree_map_t; @@ -1436,14 +1464,16 @@ vect_build_slp_tree (vec_info *vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n", - *leader ? "" : "failed ", *leader); - if (*leader) + !(*leader)->failed ? "" : "failed ", *leader); + if (!(*leader)->failed) { SLP_TREE_REF_COUNT (*leader)++; vect_update_max_nunits (max_nunits, (*leader)->max_nunits); stmts.release (); + return *leader; } - return *leader; + memcpy (matches, (*leader)->failed, sizeof (bool) * group_size); + return NULL; } /* Seed the bst_map with a stub node to be filled by vect_build_slp_tree_2 @@ -1458,34 +1488,42 @@ vect_build_slp_tree (vec_info *vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "SLP discovery limit exceeded\n"); - bool existed_p = bst_map->put (stmts, NULL); - gcc_assert (existed_p); /* Mark the node invalid so we can detect those when still in use as backedge destinations. */ SLP_TREE_SCALAR_STMTS (res) = vNULL; SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def; - vect_free_slp_tree (res); + res->failed = XNEWVEC (bool, group_size); + memset (res->failed, 0, sizeof (bool) * group_size); memset (matches, 0, sizeof (bool) * group_size); return NULL; } --*limit; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "starting SLP discovery for node %p\n", res); + poly_uint64 this_max_nunits = 1; slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size, &this_max_nunits, matches, limit, tree_size, bst_map); if (!res_) { - bool existed_p = bst_map->put (stmts, NULL); - gcc_assert (existed_p); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery for node %p failed\n", res); /* Mark the node invalid so we can detect those when still in use as backedge destinations. */ SLP_TREE_SCALAR_STMTS (res) = vNULL; SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def; - vect_free_slp_tree (res); + res->failed = XNEWVEC (bool, group_size); + memcpy (res->failed, matches, sizeof (bool) * group_size); } else { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery for node %p succeeded\n", res); gcc_assert (res_ == res); res->max_nunits = this_max_nunits; vect_update_max_nunits (max_nunits, this_max_nunits); @@ -1495,6 +1533,48 @@ vect_build_slp_tree (vec_info *vinfo, return res_; } +/* Helper for building an associated SLP node chain. */ + +static void +vect_slp_build_two_operator_nodes (slp_tree perm, + slp_tree op0, slp_tree op1, + stmt_vec_info oper1, stmt_vec_info oper2, + vec<std::pair<unsigned, unsigned> > lperm) +{ + unsigned group_size = SLP_TREE_LANES (op1); + tree vectype = SLP_TREE_VECTYPE (op1); + + slp_tree child1 = new _slp_tree; + SLP_TREE_DEF_TYPE (child1) = vect_internal_def; + SLP_TREE_VECTYPE (child1) = vectype; + SLP_TREE_LANES (child1) = group_size; + SLP_TREE_CHILDREN (child1).create (2); + SLP_TREE_CHILDREN (child1).quick_push (op0); + SLP_TREE_CHILDREN (child1).quick_push (op1); + SLP_TREE_REPRESENTATIVE (child1) = oper1; + + slp_tree child2 = new _slp_tree; + SLP_TREE_DEF_TYPE (child2) = vect_internal_def; + SLP_TREE_VECTYPE (child2) = vectype; + SLP_TREE_LANES (child2) = group_size; + SLP_TREE_CHILDREN (child2).create (2); + SLP_TREE_CHILDREN (child2).quick_push (op0); + SLP_TREE_REF_COUNT (op0)++; + SLP_TREE_CHILDREN (child2).quick_push (op1); + SLP_TREE_REF_COUNT (op1)++; + SLP_TREE_REPRESENTATIVE (child2) = oper2; + + SLP_TREE_DEF_TYPE (perm) = vect_internal_def; + SLP_TREE_CODE (perm) = VEC_PERM_EXPR; + SLP_TREE_VECTYPE (perm) = vectype; + SLP_TREE_LANES (perm) = group_size; + /* ??? We should set this NULL but that's not expected. */ + SLP_TREE_REPRESENTATIVE (perm) = oper1; + SLP_TREE_LANE_PERMUTATION (perm) = lperm; + SLP_TREE_CHILDREN (perm).quick_push (child1); + SLP_TREE_CHILDREN (perm).quick_push (child2); +} + /* Recursively build an SLP tree starting from NODE. Fail (and return a value not equal to zero) if def-stmts are not isomorphic, require data permutation or are of unsupported types of @@ -1672,6 +1752,353 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, SLP_TREE_CHILDREN (node).quick_push (vnode); return node; } + /* When discovery reaches an associatable operation see whether we can + improve that to match up lanes in a way superior to the operand + swapping code which at most looks at two defs. + ??? For BB vectorization we cannot do the brute-force search + for matching as we can succeed by means of builds from scalars + and have no good way to "cost" one build against another. */ + else if (is_a <loop_vec_info> (vinfo) + /* ??? We don't handle !vect_internal_def defs below. */ + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + && is_gimple_assign (stmt_info->stmt) + && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) + || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR) + && ((FLOAT_TYPE_P (vectype) && flag_associative_math) + || (INTEGRAL_TYPE_P (TREE_TYPE (vectype)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (vectype))))) + { + /* See if we have a chain of (mixed) adds or subtracts or other + associatable ops. */ + enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt); + if (code == MINUS_EXPR) + code = PLUS_EXPR; + stmt_vec_info other_op_stmt_info = NULL; + stmt_vec_info op_stmt_info = NULL; + unsigned chain_len = 0; + auto_vec<chain_op_t> chain; + auto_vec<std::pair<tree_code, gimple *> > worklist; + auto_vec<vec<chain_op_t> > chains (group_size); + auto_vec<slp_tree, 4> children; + bool hard_fail = true; + for (unsigned lane = 0; lane < group_size; ++lane) + { + /* For each lane linearize the addition/subtraction (or other + uniform associatable operation) expression tree. */ + worklist.safe_push (std::make_pair (code, stmts[lane]->stmt)); + while (!worklist.is_empty ()) + { + auto entry = worklist.pop (); + gassign *stmt = as_a <gassign *> (entry.second); + enum tree_code in_code = entry.first; + enum tree_code this_code = gimple_assign_rhs_code (stmt); + /* Pick some stmts suitable for SLP_TREE_REPRESENTATIVE. */ + if (!op_stmt_info + && gimple_assign_rhs_code (stmt) == code) + op_stmt_info = vinfo->lookup_stmt (stmt); + else if (!other_op_stmt_info + && gimple_assign_rhs_code (stmt) == MINUS_EXPR) + other_op_stmt_info = vinfo->lookup_stmt (stmt); + for (unsigned opnum = 1; opnum <= 2; ++opnum) + { + tree op = gimple_op (stmt, opnum); + vect_def_type dt; + stmt_vec_info def_stmt_info; + bool res = vect_is_simple_use (op, vinfo, &dt, &def_stmt_info); + gcc_assert (res); + gimple *use_stmt; + use_operand_p use_p; + if (dt == vect_internal_def + && single_imm_use (op, &use_p, &use_stmt) + && is_gimple_assign (def_stmt_info->stmt) + && (gimple_assign_rhs_code (def_stmt_info->stmt) == code + || (code == PLUS_EXPR + && (gimple_assign_rhs_code (def_stmt_info->stmt) + == MINUS_EXPR)))) + { + tree_code op_def_code = this_code; + if (op_def_code == MINUS_EXPR && opnum == 1) + op_def_code = PLUS_EXPR; + if (in_code == MINUS_EXPR) + op_def_code + = op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR; + worklist.safe_push (std::make_pair (op_def_code, + def_stmt_info->stmt)); + } + else + { + tree_code op_def_code = this_code; + if (op_def_code == MINUS_EXPR && opnum == 1) + op_def_code = PLUS_EXPR; + if (in_code == MINUS_EXPR) + op_def_code + = op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR; + chain.safe_push (chain_op_t (op_def_code, dt, op)); + } + } + } + if (chain.length () == 2) + { + /* In a chain of just two elements resort to the regular + operand swapping scheme. If we run into a length + mismatch still hard-FAIL. */ + if (chain_len == 0) + hard_fail = false; + break; + } + else if (chain_len == 0) + chain_len = chain.length (); + else if (chain.length () != chain_len) + /* ??? Here we could slip in magic to compensate with + neutral operands. */ + break; + chains.quick_push (chain.copy ()); + chain.truncate (0); + } + if (chains.length () == group_size) + { + /* Now we have a set of chains with the same length. */ + /* 1. pre-sort according to def_type and operation. */ + for (unsigned lane = 0; lane < group_size; ++lane) + chains[lane].sort (dt_sort_cmp, vinfo); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "pre-sorted chains of %s\n", + get_tree_code_name (code)); + for (unsigned lane = 0; lane < group_size; ++lane) + { + for (unsigned opnum = 0; opnum < chain_len; ++opnum) + dump_printf (MSG_NOTE, "%s %T ", + get_tree_code_name (chains[lane][opnum].code), + chains[lane][opnum].op); + dump_printf (MSG_NOTE, "\n"); + } + } + /* 2. try to build children nodes, associating as necessary. */ + for (unsigned n = 0; n < chain_len; ++n) + { + vect_def_type dt = chains[0][n].dt; + unsigned lane; + for (lane = 0; lane < group_size; ++lane) + if (chains[lane][n].dt != dt) + { + if (dt == vect_constant_def + && chains[lane][n].dt == vect_external_def) + dt = vect_external_def; + else if (dt == vect_external_def + && chains[lane][n].dt == vect_constant_def) + ; + else + break; + } + if (lane != group_size) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "giving up on chain due to mismatched " + "def types\n"); + goto out; + } + if (dt == vect_constant_def + || dt == vect_external_def) + { + /* We can always build those. Might want to sort last + or defer building. */ + vec<tree> ops; + ops.create (group_size); + for (lane = 0; lane < group_size; ++lane) + ops.quick_push (chains[lane][n].op); + slp_tree child = vect_create_new_slp_node (ops); + SLP_TREE_DEF_TYPE (child) = dt; + children.safe_push (child); + } + else if (dt != vect_internal_def) + { + /* Not sure, we might need sth special. + gcc.dg/vect/pr96854.c, + gfortran.dg/vect/fast-math-pr37021.f90 + and gfortran.dg/vect/pr61171.f trigger. */ + /* Soft-fail for now. */ + hard_fail = false; + goto out; + } + else + { + vec<stmt_vec_info> op_stmts; + op_stmts.create (group_size); + slp_tree child = NULL; + /* Brute-force our way. We have to consider a lane + failing after fixing an earlier fail up in the + SLP discovery recursion. So track the current + permute per lane. */ + unsigned *perms = XALLOCAVEC (unsigned, group_size); + memset (perms, 0, sizeof (unsigned) * group_size); + do + { + op_stmts.truncate (0); + for (lane = 0; lane < group_size; ++lane) + op_stmts.quick_push + (vinfo->lookup_def (chains[lane][n].op)); + child = vect_build_slp_tree (vinfo, op_stmts, + group_size, &this_max_nunits, + matches, limit, + &this_tree_size, bst_map); + /* ??? We're likely getting too many fatal mismatches + here so maybe we want to ignore them (but then we + have no idea which lanes fatally mismatched). */ + if (child || !matches[0]) + break; + /* Swap another lane we have not yet matched up into + lanes that did not match. If we run out of + permute possibilities for a lane terminate the + search. */ + bool term = false; + for (lane = 1; lane < group_size; ++lane) + if (!matches[lane]) + { + if (n + perms[lane] + 1 == chain_len) + { + term = true; + break; + } + std::swap (chains[lane][n], + chains[lane][n + perms[lane] + 1]); + perms[lane]++; + } + if (term) + break; + } + while (1); + if (!child) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "failed to match up op %d\n", n); + op_stmts.release (); + goto out; + } + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "matched up op %d to\n", n); + vect_print_slp_tree (MSG_NOTE, vect_location, child); + } + children.safe_push (child); + } + } + /* 3. build SLP nodes to combine the chain. */ + for (unsigned lane = 0; lane < group_size; ++lane) + if (chains[lane][0].code != code) + { + /* See if there's any alternate all-PLUS entry. */ + unsigned n; + for (n = 1; n < chain_len; ++n) + { + for (lane = 0; lane < group_size; ++lane) + if (chains[lane][n].code != code) + break; + if (lane == group_size) + break; + } + if (n != chain_len) + { + /* Swap that in at first position. */ + std::swap (children[0], children[n]); + for (lane = 0; lane < group_size; ++lane) + std::swap (chains[lane][0], chains[lane][n]); + } + else + { + /* ??? When this triggers and we end up with two + vect_constant/external_def up-front things break (ICE) + spectacularly finding an insertion place for the + all-constant op. We should have a fully + vect_internal_def operand though(?) so we can swap + that into first place and then prepend the all-zero + constant. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "inserting constant zero to compensate " + "for (partially) negated first " + "operand\n"); + chain_len++; + for (lane = 0; lane < group_size; ++lane) + chains[lane].safe_insert + (0, chain_op_t (code, vect_constant_def, NULL_TREE)); + vec<tree> zero_ops; + zero_ops.create (group_size); + zero_ops.quick_push (build_zero_cst (TREE_TYPE (vectype))); + for (lane = 1; lane < group_size; ++lane) + zero_ops.quick_push (zero_ops[0]); + slp_tree zero = vect_create_new_slp_node (zero_ops); + SLP_TREE_DEF_TYPE (zero) = vect_constant_def; + children.safe_insert (0, zero); + } + break; + } + for (unsigned i = 1; i < children.length (); ++i) + { + slp_tree op0 = children[i - 1]; + slp_tree op1 = children[i]; + bool this_two_op = false; + for (unsigned lane = 0; lane < group_size; ++lane) + if (chains[lane][i].code != chains[0][i].code) + { + this_two_op = true; + break; + } + slp_tree child; + if (i == children.length () - 1) + child = vect_create_new_slp_node (node, stmts, 2); + else + child = vect_create_new_slp_node (2, ERROR_MARK); + if (this_two_op) + { + vec<std::pair<unsigned, unsigned> > lperm; + lperm.create (group_size); + for (unsigned lane = 0; lane < group_size; ++lane) + lperm.quick_push (std::make_pair + (chains[lane][i].code != chains[0][i].code, lane)); + vect_slp_build_two_operator_nodes (child, op0, op1, + (chains[0][i].code == code + ? op_stmt_info + : other_op_stmt_info), + (chains[0][i].code == code + ? other_op_stmt_info + : op_stmt_info), + lperm); + } + else + { + SLP_TREE_DEF_TYPE (child) = vect_internal_def; + SLP_TREE_VECTYPE (child) = vectype; + SLP_TREE_LANES (child) = group_size; + SLP_TREE_CHILDREN (child).quick_push (op0); + SLP_TREE_CHILDREN (child).quick_push (op1); + SLP_TREE_REPRESENTATIVE (child) + = (chains[0][i].code == code + ? op_stmt_info : other_op_stmt_info); + } + children[i] = child; + } + *tree_size += this_tree_size + 1; + *max_nunits = this_max_nunits; + while (!chains.is_empty ()) + chains.pop ().release (); + return node; + } +out: + while (!children.is_empty ()) + vect_free_slp_tree (children.pop ()); + while (!chains.is_empty ()) + chains.pop ().release (); + /* Hard-fail, otherwise we might run into quadratic processing of the + chains starting one stmt into the chain again. */ + if (hard_fail) + return NULL; + /* Fall thru to normal processing. */ + } /* Get at the operands, verifying they are compatible. */ vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 06d20c7..1fb46c6 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -167,6 +167,11 @@ struct _slp_tree { int vertex; + /* If not NULL this is a cached failed SLP discovery attempt with + the lanes that failed during SLP discovery as 'false'. This is + a copy of the matches array. */ + bool *failed; + /* Allocate from slp_tree_pool. */ static void *operator new (size_t); diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index d562765..53b66f8 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,43 @@ +2021-06-09 Carl Love <cel@us.ibm.com> + + * config.host: Add if test and set for + libgcc_cv_powerpc_3_1_float128_hw. + * config/rs6000/fixkfti.c: Renamed to fixkfti-sw.c. + Change calls of __fixkfti to __fixkfti_sw. + * config/rs6000/fixunskfti.c: Renamed to fixunskfti-sw.c. + Change calls of __fixunskfti to __fixunskfti_sw. + * config/rs6000/float128-p10.c (__floattikf_hw, + __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw): New file. + * config/rs6000/float128-ifunc.c (SW_OR_HW_ISA3_1): New macro. + (__floattikf_resolve, __floatuntikf_resolve, __fixkfti_resolve, + __fixunskfti_resolve): Add resolve functions. + (__floattikf, __floatuntikf, __fixkfti, __fixunskfti): New functions. + * config/rs6000/float128-sed (floattitf, __floatuntitf, + __fixtfti, __fixunstfti): Add editor commands to change names. + * config/rs6000/float128-sed-hw (__floattitf, + __floatuntitf, __fixtfti, __fixunstfti): Add editor commands to + change names. + * config/rs6000/floattikf.c: Renamed to floattikf-sw.c. + * config/rs6000/floatuntikf.c: Renamed to floatuntikf-sw.c. + * config/rs6000/quad-float128.h (__floattikf_sw, + __floatuntikf_sw, __fixkfti_sw, __fixunskfti_sw, __floattikf_hw, + __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw, __floattikf, + __floatuntikf, __fixkfti, __fixunskfti): New extern declarations. + * config/rs6000/t-float128 (floattikf, floatuntikf, + fixkfti, fixunskfti): Remove file names from fp128_ppc_funcs. + (floattikf-sw, floatuntikf-sw, fixkfti-sw, fixunskfti-sw): Add + file names to fp128_ppc_funcs. + * config/rs6000/t-float128-hw(fp128_3_1_hw_funcs, + fp128_3_1_hw_src, fp128_3_1_hw_static_obj, fp128_3_1_hw_shared_obj, + fp128_3_1_hw_obj): Add variables for ISA 3.1 support. + * config/rs6000/t-float128-p10-hw: New file. + * configure: Update script for isa 3.1 128-bit float support. + * configure.ac: Add check for 128-bit float hardware support. + * config/rs6000/fixkfti-sw.c: New file. + * config/rs6000/fixunskfti-sw.c: New file. + * config/rs6000/floattikf-sw.c: New file. + * config/rs6000/floatuntikf-sw.c: New file. + 2021-05-13 Dimitar Dimitrov <dimitar@dinux.eu> * config/pru/mpyll.S (__pruabi_mpyll): Place into own section. diff --git a/libgcc/config.host b/libgcc/config.host index f808b61..50f0006 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -1224,6 +1224,10 @@ powerpc*-*-linux*) tmake_file="${tmake_file} rs6000/t-float128-hw" fi + if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then + tmake_file="${tmake_file} rs6000/t-float128-p10-hw" + fi + extra_parts="$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o" md_unwind_header=rs6000/linux-unwind.h ;; diff --git a/libgcc/config/rs6000/fixkfti.c b/libgcc/config/rs6000/fixkfti-sw.c index 0d965bc..cc000fc 100644 --- a/libgcc/config/rs6000/fixkfti.c +++ b/libgcc/config/rs6000/fixkfti-sw.c @@ -5,7 +5,7 @@ This file is part of the GNU C Library. Contributed by Steven Munroe (munroesj@linux.vnet.ibm.com) Code is based on the main soft-fp library written by: - Uros Bizjak (ubizjak@gmail.com). + Uros Bizjak (ubizjak@gmail.com). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -35,7 +35,7 @@ #include "quad-float128.h" TItype -__fixkfti (TFtype a) +__fixkfti_sw (TFtype a) { FP_DECL_EX; FP_DECL_Q (A); diff --git a/libgcc/config/rs6000/fixunskfti.c b/libgcc/config/rs6000/fixunskfti-sw.c index f285b4e..7a04d1a 100644 --- a/libgcc/config/rs6000/fixunskfti.c +++ b/libgcc/config/rs6000/fixunskfti-sw.c @@ -5,7 +5,7 @@ This file is part of the GNU C Library. Contributed by Steven Munroe (munroesj@linux.vnet.ibm.com) Code is based on the main soft-fp library written by: - Uros Bizjak (ubizjak@gmail.com). + Uros Bizjak (ubizjak@gmail.com). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -35,7 +35,7 @@ #include "quad-float128.h" UTItype -__fixunskfti (TFtype a) +__fixunskfti_sw (TFtype a) { FP_DECL_EX; FP_DECL_Q (A); diff --git a/libgcc/config/rs6000/float128-ifunc.c b/libgcc/config/rs6000/float128-ifunc.c index 8538047..57545dd 100644 --- a/libgcc/config/rs6000/float128-ifunc.c +++ b/libgcc/config/rs6000/float128-ifunc.c @@ -46,14 +46,9 @@ #endif #define SW_OR_HW(SW, HW) (__builtin_cpu_supports ("ieee128") ? HW : SW) +#define SW_OR_HW_ISA3_1(SW, HW) (__builtin_cpu_supports ("arch_3_1") ? HW : SW) /* Resolvers. */ - -/* We do not provide ifunc resolvers for __fixkfti, __fixunskfti, __floattikf, - and __floatuntikf. There is no ISA 3.0 instruction that converts between - 128-bit integer types and 128-bit IEEE floating point, or vice versa. So - use the emulator functions for these conversions. */ - static __typeof__ (__addkf3_sw) * __addkf3_resolve (void) { @@ -102,6 +97,18 @@ __floatdikf_resolve (void) return SW_OR_HW (__floatdikf_sw, __floatdikf_hw); } +static __typeof__ (__floattikf_sw) * +__floattikf_resolve (void) +{ + return SW_OR_HW_ISA3_1 (__floattikf_sw, __floattikf_hw); +} + +static __typeof__ (__floatuntikf_sw) * +__floatuntikf_resolve (void) +{ + return SW_OR_HW_ISA3_1 (__floatuntikf_sw, __floatuntikf_hw); +} + static __typeof__ (__floatunsikf_sw) * __floatunsikf_resolve (void) { @@ -114,6 +121,19 @@ __floatundikf_resolve (void) return SW_OR_HW (__floatundikf_sw, __floatundikf_hw); } + +static __typeof__ (__fixkfti_sw) * +__fixkfti_resolve (void) +{ + return SW_OR_HW_ISA3_1 (__fixkfti_sw, __fixkfti_hw); +} + +static __typeof__ (__fixunskfti_sw) * +__fixunskfti_resolve (void) +{ + return SW_OR_HW_ISA3_1 (__fixunskfti_sw, __fixunskfti_hw); +} + static __typeof__ (__fixkfsi_sw) * __fixkfsi_resolve (void) { @@ -303,6 +323,18 @@ TFtype __floatsikf (SItype_ppc) TFtype __floatdikf (DItype_ppc) __attribute__ ((__ifunc__ ("__floatdikf_resolve"))); +TFtype __floattikf (TItype_ppc) + __attribute__ ((__ifunc__ ("__floattikf_resolve"))); + +TFtype __floatuntikf (UTItype_ppc) + __attribute__ ((__ifunc__ ("__floatuntikf_resolve"))); + +TItype_ppc __fixkfti (TFtype) + __attribute__ ((__ifunc__ ("__fixkfti_resolve"))); + +UTItype_ppc __fixunskfti (TFtype) + __attribute__ ((__ifunc__ ("__fixunskfti_resolve"))); + TFtype __floatunsikf (USItype_ppc) __attribute__ ((__ifunc__ ("__floatunsikf_resolve"))); diff --git a/libgcc/config/rs6000/float128-p10.c b/libgcc/config/rs6000/float128-p10.c new file mode 100644 index 0000000..7f5d317 --- /dev/null +++ b/libgcc/config/rs6000/float128-p10.c @@ -0,0 +1,71 @@ +/* Automatic switching between software and hardware IEEE 128-bit + ISA 3.1 floating-point emulation for PowerPC. + + Copyright (C) 2016-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Carl Love (cel@us.ibm.com) + Code is based on the main soft-fp library written by: + Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file into + combinations with other programs, and to distribute those + combinations without any restriction coming from the use of this + file. (The Lesser General Public License restrictions do apply in + other respects; for example, they cover modification of the file, + and distribution when not linked into a combine executable.) + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Note, the hardware conversion instructions for 128-bit integers are + supported for ISA 3.1 and later. Only compile this file with -mcpu=power10 + or newer support. */ + +#include <soft-fp.h> +#include <quad-float128.h> + +#ifndef __FLOAT128_HARDWARE__ +#error "This module must be compiled with IEEE 128-bit hardware support" +#endif + +#ifndef _ARCH_PWR10 +#error "This module must be compiled for Power 10 support" +#endif + +TFtype +__floattikf_hw (TItype_ppc a) +{ + return (TFtype) a; +} + +TFtype +__floatuntikf_hw (UTItype_ppc a) +{ + return (TFtype) a; +} + +TItype_ppc +__fixkfti_hw (TFtype a) +{ + return (TItype_ppc) a; +} + +UTItype_ppc +__fixunskfti_hw (TFtype a) +{ + return (UTItype_ppc) a; +} diff --git a/libgcc/config/rs6000/float128-sed b/libgcc/config/rs6000/float128-sed index d9a089f..c0fcddb 100644 --- a/libgcc/config/rs6000/float128-sed +++ b/libgcc/config/rs6000/float128-sed @@ -8,6 +8,10 @@ s/__fixtfsi/__fixkfsi/g s/__fixunstfdi/__fixunskfdi/g s/__fixunstfsi/__fixunskfsi/g s/__floatditf/__floatdikf/g +s/__floattitf/__floattikf/g +s/__floatuntitf/__floatuntikf/g +s/__fixtfti/__fixkfti/g +s/__fixunstfti/__fixunskfti/g s/__floatsitf/__floatsikf/g s/__floatunditf/__floatundikf/g s/__floatunsitf/__floatunsikf/g diff --git a/libgcc/config/rs6000/float128-sed-hw b/libgcc/config/rs6000/float128-sed-hw index acf36b0..3d2bf55 100644 --- a/libgcc/config/rs6000/float128-sed-hw +++ b/libgcc/config/rs6000/float128-sed-hw @@ -8,6 +8,10 @@ s/__fixtfsi/__fixkfsi_sw/g s/__fixunstfdi/__fixunskfdi_sw/g s/__fixunstfsi/__fixunskfsi_sw/g s/__floatditf/__floatdikf_sw/g +s/__floattitf/__floattikf_sw/g +s/__floatuntitf/__floatuntikf_sw/g +s/__fixtfti/__fixkfti_sw/g +s/__fixunstfti/__fixunskfti_sw/g s/__floatsitf/__floatsikf_sw/g s/__floatunditf/__floatundikf_sw/g s/__floatunsitf/__floatunsikf_sw/g diff --git a/libgcc/config/rs6000/floattikf.c b/libgcc/config/rs6000/floattikf-sw.c index cc5c7ca..4e1786c 100644 --- a/libgcc/config/rs6000/floattikf.c +++ b/libgcc/config/rs6000/floattikf-sw.c @@ -5,7 +5,7 @@ This file is part of the GNU C Library. Contributed by Steven Munroe (munroesj@linux.vnet.ibm.com) Code is based on the main soft-fp library written by: - Uros Bizjak (ubizjak@gmail.com). + Uros Bizjak (ubizjak@gmail.com). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -35,7 +35,7 @@ #include "quad-float128.h" TFtype -__floattikf (TItype i) +__floattikf_sw (TItype i) { FP_DECL_EX; FP_DECL_Q (A); diff --git a/libgcc/config/rs6000/floatuntikf.c b/libgcc/config/rs6000/floatuntikf-sw.c index 96f2d3b..c4b814d 100644 --- a/libgcc/config/rs6000/floatuntikf.c +++ b/libgcc/config/rs6000/floatuntikf-sw.c @@ -5,7 +5,7 @@ This file is part of the GNU C Library. Contributed by Steven Munroe (munroesj@linux.vnet.ibm.com) Code is based on the main soft-fp library written by: - Uros Bizjak (ubizjak@gmail.com). + Uros Bizjak (ubizjak@gmail.com). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -35,7 +35,7 @@ #include "quad-float128.h" TFtype -__floatuntikf (UTItype i) +__floatuntikf_sw (UTItype i) { FP_DECL_EX; FP_DECL_Q (A); diff --git a/libgcc/config/rs6000/quad-float128.h b/libgcc/config/rs6000/quad-float128.h index 5beb153..c4d775b 100644 --- a/libgcc/config/rs6000/quad-float128.h +++ b/libgcc/config/rs6000/quad-float128.h @@ -88,19 +88,18 @@ extern USItype_ppc __fixunskfsi_sw (TFtype); extern UDItype_ppc __fixunskfdi_sw (TFtype); extern TFtype __floatsikf_sw (SItype_ppc); extern TFtype __floatdikf_sw (DItype_ppc); +extern TFtype __floattikf_sw (TItype_ppc); extern TFtype __floatunsikf_sw (USItype_ppc); extern TFtype __floatundikf_sw (UDItype_ppc); +extern TFtype __floatuntikf_sw (UTItype_ppc); +extern TItype_ppc __fixkfti_sw (TFtype); +extern UTItype_ppc __fixunskfti_sw (TFtype); extern IBM128_TYPE __extendkftf2_sw (TFtype); extern TFtype __trunctfkf2_sw (IBM128_TYPE); extern TCtype __mulkc3_sw (TFtype, TFtype, TFtype, TFtype); extern TCtype __divkc3_sw (TFtype, TFtype, TFtype, TFtype); #ifdef _ARCH_PPC64 -/* We do not provide ifunc resolvers for __fixkfti, __fixunskfti, __floattikf, - and __floatuntikf. There is no ISA 3.0 instruction that converts between - 128-bit integer types and 128-bit IEEE floating point, or vice versa. So - use the emulator functions for these conversions. */ - extern TItype_ppc __fixkfti (TFtype); extern UTItype_ppc __fixunskfti (TFtype); extern TFtype __floattikf (TItype_ppc); @@ -131,8 +130,12 @@ extern USItype_ppc __fixunskfsi_hw (TFtype); extern UDItype_ppc __fixunskfdi_hw (TFtype); extern TFtype __floatsikf_hw (SItype_ppc); extern TFtype __floatdikf_hw (DItype_ppc); +extern TFtype __floattikf_hw (TItype_ppc); extern TFtype __floatunsikf_hw (USItype_ppc); extern TFtype __floatundikf_hw (UDItype_ppc); +extern TFtype __floatuntikf_hw (UTItype_ppc); +extern TItype_ppc __fixkfti_hw (TFtype); +extern UTItype_ppc __fixunskfti_hw (TFtype); extern IBM128_TYPE __extendkftf2_hw (TFtype); extern TFtype __trunctfkf2_hw (IBM128_TYPE); extern TCtype __mulkc3_hw (TFtype, TFtype, TFtype, TFtype); @@ -163,8 +166,12 @@ extern USItype_ppc __fixunskfsi (TFtype); extern UDItype_ppc __fixunskfdi (TFtype); extern TFtype __floatsikf (SItype_ppc); extern TFtype __floatdikf (DItype_ppc); +extern TFtype __floattikf (TItype_ppc); extern TFtype __floatunsikf (USItype_ppc); extern TFtype __floatundikf (UDItype_ppc); +extern TFtype __floatuntikf (UTItype_ppc); +extern TItype_ppc __fixkfti (TFtype); +extern UTItype_ppc __fixunskfti (TFtype); extern IBM128_TYPE __extendkftf2 (TFtype); extern TFtype __trunctfkf2 (IBM128_TYPE); diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128 index d745f0d..b09b566 100644 --- a/libgcc/config/rs6000/t-float128 +++ b/libgcc/config/rs6000/t-float128 @@ -31,7 +31,8 @@ ibm128_dec_funcs = _tf_to_sd _tf_to_dd _tf_to_td \ _sd_to_tf _dd_to_tf _td_to_tf # New functions for software emulation -fp128_ppc_funcs = floattikf floatuntikf fixkfti fixunskfti \ +fp128_ppc_funcs = floattikf-sw floatuntikf-sw \ + fixkfti-sw fixunskfti-sw \ extendkftf2-sw trunctfkf2-sw \ sfp-exceptions _mulkc3 _divkc3 _powikf2 @@ -47,13 +48,16 @@ fp128_ppc_obj = $(fp128_ppc_static_obj) $(fp128_ppc_shared_obj) # All functions fp128_funcs = $(fp128_softfp_funcs) $(fp128_ppc_funcs) \ - $(fp128_hw_funcs) $(fp128_ifunc_funcs) + $(fp128_hw_funcs) $(fp128_ifunc_funcs) \ + $(fp128_3_1_hw_funcs) fp128_src = $(fp128_softfp_src) $(fp128_ppc_src) \ - $(fp128_hw_src) $(fp128_ifunc_src) + $(fp128_hw_src) $(fp128_ifunc_src) \ + $(fp128_3_1_hw_src) fp128_obj = $(fp128_softfp_obj) $(fp128_ppc_obj) \ - $(fp128_hw_obj) $(fp128_ifunc_obj) + $(fp128_hw_obj) $(fp128_ifunc_obj) \ + $(fp128_3_1_hw_obj) fp128_sed = $(srcdir)/config/rs6000/float128-sed$(fp128_sed_hw) fp128_dep = $(fp128_sed) $(srcdir)/config/rs6000/t-float128 diff --git a/libgcc/config/rs6000/t-float128-hw b/libgcc/config/rs6000/t-float128-hw index d64ca4d..c082736 100644 --- a/libgcc/config/rs6000/t-float128-hw +++ b/libgcc/config/rs6000/t-float128-hw @@ -13,6 +13,13 @@ fp128_hw_static_obj = $(addsuffix $(objext),$(fp128_hw_funcs)) fp128_hw_shared_obj = $(addsuffix _s$(objext),$(fp128_hw_funcs)) fp128_hw_obj = $(fp128_hw_static_obj) $(fp128_hw_shared_obj) +# New functions for ISA 3.1 hardware support +fp128_3_1_hw_funcs = float128-p10 +fp128_3_1_hw_src = $(srcdir)/config/rs6000/float128-p10.c +fp128_3_1_hw_static_obj = $(addsuffix $(objext),$(fp128_3_1_hw_funcs)) +fp128_3_1_hw_shared_obj = $(addsuffix _s$(objext),$(fp128_3_1_hw_funcs)) +fp128_3_1_hw_obj = $(fp128_3_1_hw_static_obj) $(fp128_3_1_hw_shared_obj) + fp128_ifunc_funcs = float128-ifunc fp128_ifunc_src = $(srcdir)/config/rs6000/float128-ifunc.c fp128_ifunc_static_obj = float128-ifunc$(objext) @@ -30,9 +37,18 @@ FP128_CFLAGS_HW = -Wno-type-limits -mvsx -mfloat128 \ -I$(srcdir)/config/rs6000 \ $(FLOAT128_HW_INSNS) +FP128_3_1_CFLAGS_HW = -Wno-type-limits -mvsx -mcpu=power10 \ + -mfloat128-hardware -mno-gnu-attribute \ + -I$(srcdir)/soft-fp \ + -I$(srcdir)/config/rs6000 \ + $(FLOAT128_HW_INSNS) + $(fp128_hw_obj) : INTERNAL_CFLAGS += $(FP128_CFLAGS_HW) $(fp128_hw_obj) : $(srcdir)/config/rs6000/t-float128-hw +$(fp128_3_1_hw_obj) : INTERNAL_CFLAGS += $(FP128_3_1_CFLAGS_HW) +$(fp128_3_1_hw_obj) : $(srcdir)/config/rs6000/t-float128-p10-hw + $(fp128_ifunc_obj) : INTERNAL_CFLAGS += $(FP128_CFLAGS_SW) $(fp128_ifunc_obj) : $(srcdir)/config/rs6000/t-float128-hw diff --git a/libgcc/config/rs6000/t-float128-p10-hw b/libgcc/config/rs6000/t-float128-p10-hw new file mode 100644 index 0000000..de36227 --- /dev/null +++ b/libgcc/config/rs6000/t-float128-p10-hw @@ -0,0 +1,24 @@ +# Support for adding __float128 hardware support to the powerpc. +# Tell the float128 functions that the ISA 3.1 hardware support can +# be compiled it to be selected via IFUNC functions. + +FLOAT128_HW_INSNS = -DFLOAT128_HW_INSNS + +# New functions for hardware support + +fp128_3_1_hw_funcs = float128-p10 +fp128_3_1_hw_src = $(srcdir)/config/rs6000/float128-p10.c +fp128_3_1_hw_static_obj = $(addsuffix $(objext),$(fp128_3_1_hw_funcs)) +fp128_3_1_hw_shared_obj = $(addsuffix _s$(objext),$(fp128_3_1_hw_funcs)) +fp128_3_1_hw_obj = $(fp128_3_1_hw_static_obj) $(fp128_3_1_hw_shared_obj) + +# Build the hardware support functions with appropriate hardware support +FP128_3_1_CFLAGS_HW = -Wno-type-limits -mvsx -mfloat128 \ + -mpower10 \ + -mfloat128-hardware -mno-gnu-attribute \ + -I$(srcdir)/soft-fp \ + -I$(srcdir)/config/rs6000 \ + $(FLOAT128_HW_INSNS) + +$(fp128_3_1_hw_obj) : INTERNAL_CFLAGS += $(FP128_3_1_CFLAGS_HW) +$(fp128_3_1_hw_obj) : $(srcdir)/config/rs6000/t-float128-p10-hw diff --git a/libgcc/configure b/libgcc/configure index dd3afb2..ce05e0d 100755 --- a/libgcc/configure +++ b/libgcc/configure @@ -5263,6 +5263,43 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_powerpc_float128_hw" >&5 $as_echo "$libgcc_cv_powerpc_float128_hw" >&6; } CFLAGS="$saved_CFLAGS" + + saved_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -mpower10 -mfloat128-hardware" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PowerPC ISA 3.1 to build hardware __float128 libraries" >&5 +$as_echo_n "checking for PowerPC ISA 3.1 to build hardware __float128 libraries... " >&6; } +if ${libgcc_cv_powerpc_float128_hw+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <sys/auxv.h> + #ifndef AT_PLATFORM + #error "AT_PLATFORM is not defined" + #endif + #ifndef __BUILTIN_CPU_SUPPORTS__ + #error "__builtin_cpu_supports is not available" + #endif + vector unsigned char add (vector unsigned char a, vector unsigned char b) + { + vector unsigned char ret; + __asm__ ("xscvsqqp %0,%1,%2" : "=v" (ret) : "v" (a), "v" (b)); + return ret; + } + void *add_resolver (void) { return (void *) add; } + __float128 add_ifunc (__float128, __float128) + __attribute__ ((__ifunc__ ("add_resolver"))); +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + libgcc_cv_powerpc_3_1_float128_hw=yes +else + libgcc_cv_powerpc_3_1_float128_hw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_powerpc_float128_hw" >&5 + $as_echo "$libgcc_cv_powerpc_float128_hw" >&6; } + CFLAGS="$saved_CFLAGS" esac # Collect host-machine-specific information. diff --git a/libgcc/configure.ac b/libgcc/configure.ac index 10ffb04..bc315de 100644 --- a/libgcc/configure.ac +++ b/libgcc/configure.ac @@ -458,6 +458,31 @@ powerpc*-*-linux*) [libgcc_cv_powerpc_float128_hw=yes], [libgcc_cv_powerpc_float128_hw=no])]) CFLAGS="$saved_CFLAGS" + + saved_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -mpower10 -mfloat128-hardware" + AC_CACHE_CHECK([for PowerPC ISA 3.1 to build hardware __float128 libraries], + [libgcc_cv_powerpc_float128_hw], + [AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([#include <sys/auxv.h> + #ifndef AT_PLATFORM + #error "AT_PLATFORM is not defined" + #endif + #ifndef __BUILTIN_CPU_SUPPORTS__ + #error "__builtin_cpu_supports is not available" + #endif + vector unsigned char add (vector unsigned char a, vector unsigned char b) + { + vector unsigned char ret; + __asm__ ("xscvsqqp %0,%1,%2" : "=v" (ret) : "v" (a), "v" (b)); + return ret; + } + void *add_resolver (void) { return (void *) add; } + __float128 add_ifunc (__float128, __float128) + __attribute__ ((__ifunc__ ("add_resolver")));])], + [libgcc_cv_powerpc_3_1_float128_hw=yes], + [libgcc_cv_powerpc_3_1_float128_hw=no])]) + CFLAGS="$saved_CFLAGS" esac # Collect host-machine-specific information. diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 5ad2693..255c160 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,13 @@ +2021-06-09 H.J. Lu <hjl.tools@gmail.com> + + * testsuite/lib/libgomp.exp (libgomp_init): Don't add -march=i486 + if atomic compare-and-swap is supported on 'int'. + +2021-06-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100981 + * testsuite/libgomp.fortran/pr100981-2.f90: New testcase. + 2021-06-08 Thomas Schwinge <thomas@codesourcery.com> * plugin/plugin-gcn.c (gcn_exec): Force 'num_workers (1)' diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 45c78d8..a205015 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -210,7 +210,8 @@ proc libgomp_init { args } { # We use atomic operations in the testcases to validate results. if { ([istarget i?86-*-*] || [istarget x86_64-*-*]) - && [check_effective_target_ia32] } { + && [check_effective_target_ia32] + && ![check_effective_target_cas_char] } { lappend ALWAYS_CFLAGS "additional_flags=-march=i486" } diff --git a/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 b/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 new file mode 100644 index 0000000..12836d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/pr100981-2.f90 @@ -0,0 +1,31 @@ +! { dg-do run } +! { dg-additional-options "-O3 -ftree-parallelize-loops=2 -fno-signed-zeros -fno-trapping-math" } + +complex function cdcdot(n, cx) + implicit none + + integer :: n, i, kx + complex :: cx(*) + double precision :: dsdotr, dsdoti, dt1, dt3 + + kx = 1 + do i = 1, n + dt1 = real(cx(kx)) + dt3 = aimag(cx(kx)) + dsdotr = dsdotr + dt1 * 2 - dt3 * 2 + dsdoti = dsdoti + dt1 * 2 + dt3 * 2 + kx = kx + 1 + end do + cdcdot = cmplx(real(dsdotr), real(dsdoti)) + return +end function cdcdot +program test + implicit none + complex :: cx(100), ct, cdcdot + integer :: i + do i = 1, 100 + cx(i) = cmplx(2*i, i) + end do + ct = cdcdot (100, cx) + if (ct.ne.cmplx(10100.0000,30300.0000)) call abort +end diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index f5febd3..583ca4d 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,25 @@ +2021-06-09 Thomas Rodgers <rodgert@appliantology.com> + + * testsuite/29_atomics/atomic_ref/wait_notify.cc: Guard + test logic with constexpr check for is_always_lock_free. + +2021-06-09 Jonathan Wakely <jwakely@redhat.com> + + PR libstdc++/100982 + * include/std/optional (optional::operator=(const optional<U>&)): + Fix value category used in is_assignable check. + * testsuite/20_util/optional/assignment/100982.cc: New test. + +2021-06-09 Jonathan Wakely <jwakely@redhat.com> + + * include/bits/allocator.h (allocator::is_always_equal): Deprecate. + * include/bits/iterator_concepts.h (indirectly_readable_traits): + Add LWG issue number to comment. + * include/std/memory_resource (polymorphic_allocator::release): + Deprecate. + * testsuite/20_util/allocator/requirements/typedefs.cc: Add + dg-warning for deprecation. Also check std::allocator<void>. + 2021-06-08 Thomas Rodgers <rodgert@appliantology.com> PR libstdc++/100889 diff --git a/libstdc++-v3/include/std/optional b/libstdc++-v3/include/std/optional index 415f8c4..0a67ce2 100644 --- a/libstdc++-v3/include/std/optional +++ b/libstdc++-v3/include/std/optional @@ -815,7 +815,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _Up> enable_if_t<__and_v<__not_<is_same<_Tp, _Up>>, is_constructible<_Tp, const _Up&>, - is_assignable<_Tp&, _Up>, + is_assignable<_Tp&, const _Up&>, __not_<__converts_from_optional<_Tp, _Up>>, __not_<__assigns_from_optional<_Tp, _Up>>>, optional&> diff --git a/libstdc++-v3/testsuite/20_util/optional/assignment/100982.cc b/libstdc++-v3/testsuite/20_util/optional/assignment/100982.cc new file mode 100644 index 0000000..ae56525 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/optional/assignment/100982.cc @@ -0,0 +1,17 @@ +// { dg-do compile { target c++17 } } + +#include <optional> + +struct U {}; + +struct T { + explicit T(const U&); + T& operator=(const U&); + T& operator=(U&&) = delete; +}; + +int main() { + std::optional<U> opt1; + std::optional<T> opt2; + opt2 = opt1; // PR libstdc++/100982 +} diff --git a/libstdc++-v3/testsuite/29_atomics/atomic_ref/wait_notify.cc b/libstdc++-v3/testsuite/29_atomics/atomic_ref/wait_notify.cc index 003b86c..b75e276 100644 --- a/libstdc++-v3/testsuite/29_atomics/atomic_ref/wait_notify.cc +++ b/libstdc++-v3/testsuite/29_atomics/atomic_ref/wait_notify.cc @@ -30,17 +30,20 @@ template<typename S> void test (S va, S vb) { - S aa{ va }; - S bb{ vb }; - std::atomic_ref<S> a{ aa }; - a.wait(bb); - std::thread t([&] - { - a.store(bb); - a.notify_one(); - }); - a.wait(aa); - t.join(); + if constexpr (std::atomic_ref<S>::is_always_lock_free) + { + S aa{ va }; + S bb{ vb }; + std::atomic_ref<S> a{ aa }; + a.wait(bb); + std::thread t([&] + { + a.store(bb); + a.notify_one(); + }); + a.wait(aa); + t.join(); + } } int |