diff options
Diffstat (limited to 'gcc')
157 files changed, 5983 insertions, 1803 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 26fc404..6b8ba1d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,622 @@ +2022-10-25 Eugene Rozenfeld <erozen@microsoft.com> + + * auto-profile.cc (get_combined_location): Include discriminator in the + returned combined location. + (read_function_instance): Read discriminators from profiles. + +2022-10-25 H.J. Lu <hjl.tools@gmail.com> + + PR target/107304 + * expr.cc (get_inner_reference): Always use TYPE_MODE for vector + field with vector raw mode. + +2022-10-25 Segher Boessenkool <segher@kernel.crashing.org> + + * config/rs6000/rs6000.md (CCEITHER): Delete. + (CCANY): New. + (un): Delete. + (isel_<un>signed_<GPR:mode>): Rename to... + (isel_<CCANY:mode>_<GPR:mode>): ... this. Adjust. + (*isel_reversed_<un>signed_<GPR:mode>): Rename to... + (*isel_reversed_<CCANY:mode>_<GPR:mode>): ... this. Adjust. + (setbc_<un>signed_<GPR:mode>): Rename to... + (setbc_<CCANY:mode>_<GPR:mode>C): ... this. Adjust." + (*setbcr_<un>signed_<GPR:mode>): Rename to ... + (*setbcr_<CCANY:mode>_<GPR:mode>): ... this. Adjust. + (*setnbc_<un>signed_<GPR:mode>): Rename to ... + (*setnbc_<CCANY:mode>_<GPR:mode>): ... this. Adjust. + (*setnbcr_<un>signed_<GPR:mode>): Rename to ... + (*setnbcr_<CCANY:mode>_<GPR:mode>): ... this. Adjust. + (eq<mode>3 for GPR): Adjust. + (ne<mode>3 for GPR): Adjust. + * config/rs6000/rs6000-string.cc (do_isel): Adjust. + * config/rs6000/rs6000.cc (rs6000_emit_int_cmove): Adjust. + +2022-10-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/107176 + PR tree-optimization/66375 + PR tree-optimization/42512 + * tree-scalar-evolution.cc (follow_ssa_edge_expr): Revert + the PR66375 fix, do not not associate PLUS_EXPR to be able + to use tail-recursion. + (follow_ssa_edge_binary): Likewise. + (interpret_loop_phi): Revert PR42512 fix, do not throw + away analyze_evolution_in_loop result after the fact. + (follow_ssa_edge_expr): When reaching halting_phi initalize + the evolution to the symbolic value of the PHI result. + (add_to_evolution_1): When adding the first evolution verify + we can handle the expression wrapping the symbolic evolution + and replace that in full using the initial condition. + (class scev_dfs): New, contains ... + (follow_ssa_edge_expr, follow_ssa_edge_binary, + follow_ssa_edge_in_condition_phi_branch, + follow_ssa_edge_in_condition_phi, + follow_ssa_edge_inner_loop_phi, + add_to_evolution, add_to_evolution_1): ... these with + loop and halting_phi arguments in class data. + (scev_dfs::get_ev): New toplevel DFS entry, start with + a chrec_dont_know evolution. + (analyze_evolution_in_loop): Use scev_dfs. + +2022-10-25 Eric Botcazou <ebotcazou@adacore.com> + + * profile.cc (branch_prob): Be prepared for ignored functions with + DECL_SOURCE_LOCATION set to UNKNOWN_LOCATION. + +2022-10-25 Richard Biener <rguenther@suse.de> + + * tree-scalar-evolution.cc (follow_ssa_edge_expr): Move + STRIP_USELESS_TYPE_CONVERSIONS to where it matters. + +2022-10-25 Tejas Joshi <TejasSanjay.Joshi@amd.com> + + * common/config/i386/i386-common.cc (processor_alias_table): Use + CPU_ZNVER3 for znver4. + * config/i386/znver.md: Remove znver4 reservations. + +2022-10-25 Jakub Jelinek <jakub@redhat.com> + + * gimplify.cc (gimple_boolify): Fix comment typos, prduce -> produce + and There -> These. + +2022-10-25 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/107368 + * gimplify.cc (gimplify_call_expr): For complex IFN_ASSUME + conditions call gimple_boolify on the condition. + +2022-10-25 YunQiang Su <yunqiang.su@cipunited.com> + + * config.gcc: add -with-compact-branches=policy build option. + * doc/install.texi: Likewise. + * config/mips/mips.h: Likewise. + +2022-10-25 YunQiang Su <yunqiang.su@cipunited.com> + + * config/mips/mips.cc (mips_option_override): not trigger error + for compact-branches=always for pre-R6. + * config/mips/mips.h (TARGET_RTP_PIC): not trigger error for + compact-branches=always for pre-R6. + (TARGET_CB_NEVER): Likewise. + (TARGET_CB_ALWAYS): Likewise. + (struct mips_cpu_info): define macros for compact branch policy. + * doc/invoke.texi: Document "always" with pre-R6. + +2022-10-25 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/107369 + * gimplify.cc (gimplify_call_expr): If seen_error, handle complex + IFN_ASSUME the same as for -O0. + +2022-10-25 YunQiang Su <yunqiang.su@cipunited.com> + + * configure.ac: AC_DEFINE(ENABLE_MULTIARCH, 1) + * configure: Regenerated. + * config.in: Regenerated. + * config/mips/mips.h: don't define STANDARD_STARTFILE_PREFIX_1 + if ENABLE_MULTIARCH is defined. + * config/mips/t-linux64: define correct multiarch path when + multiarch is enabled. + +2022-10-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100756 + * tree-ssa-loop-niter.cc (expand_simple_operations): Also + expand multiplications by invariants. + +2022-10-25 Kewen Lin <linkw@linux.ibm.com> + + PR tree-optimization/107338 + * tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern): Move + shfit_n calculation before the adjustments for widening loads. + +2022-10-25 Martin Liska <mliska@suse.cz> + + * common/config/riscv/riscv-common.cc + (riscv_get_valid_option_values): Get out of ifdef. + +2022-10-25 Martin Liska <mliska@suse.cz> + + PR target/107364 + * common/config/i386/i386-cpuinfo.h (enum processor_vendor): + Fix pedantic warning. + +2022-10-24 Martin Liska <mliska@suse.cz> + + PR analyzer/107366 + * diagnostic-format-sarif.cc + (sarif_builder::maybe_make_physical_location_object): Gracefully + reject locations with NULL filename. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/106300 + * doc/invoke.texi (Static Analyzer Options): Add "pipe" and + "pipe2" to the list of functions the analyzer has hardcoded + knowledge of. + +2022-10-24 Jason Merrill <jason@redhat.com> + + * tree.h (build_string_literal): New one-argument overloads that + take tree (identifier) and const char *. + * builtins.cc (fold_builtin_FILE) + (fold_builtin_FUNCTION) + * gimplify.cc (gimple_add_init_for_auto_var) + * vtable-verify.cc (verify_bb_vtables): Simplify calls. + +2022-10-24 Martin Liska <mliska@suse.cz> + + PR target/107364 + * common/config/i386/i386-cpuinfo.h (enum processor_vendor): + Reorder enum values as BUILTIN_VENDOR_MAX should not point + in the middle of the valid enum values. + +2022-10-24 Marek Polacek <polacek@redhat.com> + + PR c++/107276 + * tree.cc (maybe_wrap_with_location): Don't create a location wrapper + when the type is erroneous. + +2022-10-24 Wilco Dijkstra <wdijkstr@arm.com> + + PR target/106583 + * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) + Add support for a bitmask immediate with 2 MOVKs. + (aarch64_check_bitmask): New function after refactorization. + (aarch64_bitmask_imm): Simplify replication of small modes. + Split function into 64-bit only version for efficiency. + (aarch64_move_imm): Move near other immediate functions. + (aarch64_uimm12_shift): Likewise. + (aarch64_clamp_to_uimm12_shift): Likewise. + (aarch64_movk_shift): Likewise. + (aarch64_replicate_bitmask_imm): Likewise. + (aarch64_and_split_imm1): Likewise. + (aarch64_and_split_imm2): Likewise. + (aarch64_and_bitmask_imm): Likewise. + (aarch64_movw_imm): Likewise. + +2022-10-24 Aldy Hernandez <aldyh@redhat.com> + + PR tree-optimization/107355 + * range-op-float.cc (foperator_abs::op1_range): Handle NAN. + +2022-10-24 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/107236 + * omp-expand.cc (expand_omp_target): Set calls_declare_variant_alt + in DECL_CONTEXT and not to cfun->decl. + * cgraphclones.cc (cgraph_node::create_clone): Copy also the + node's calls_declare_variant_alt value. + +2022-10-24 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc (riscv_tunes): New. + (riscv_get_valid_option_values): New. + (TARGET_GET_VALID_OPTION_VALUES): New. + * config/riscv/riscv-cores.def (RISCV_TUNE): New, define options + for tune here. + (RISCV_CORE): Fix comment. + * config/riscv/riscv.cc (riscv_tune_info_table): Move definition to + riscv-cores.def. + +2022-10-24 Aldy Hernandez <aldyh@redhat.com> + + PR tree-optimization/107365 + * value-range.cc (frange::verify_range): Predicate NAN check in + VARYING range on HONOR_NANS instead of flag_finite_math_only. + (range_tests_floats): Same. + (range_tests_floats_various): New. + (range_tests): Call range_tests_floats_various. + +2022-10-24 Torbjörn SVENSSON <torbjorn.svensson@foss.st.com> + Yvan ROUX <yvan.roux@foss.st.com> + + * lto-wrapper.cc: Quote paths in makefile. + +2022-10-24 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config/riscv/riscv.cc (riscv_legitimize_move): Support (set (mem) (const_poly_int)). + +2022-10-24 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config/riscv/riscv-vector-builtins-bases.cc: Replace CONSTEXPR + with constexpr throughout. + * config/riscv/riscv-vector-builtins-shapes.cc (SHAPE): Likewise. + * config/riscv/riscv-vector-builtins.cc + (struct registered_function_hasher): Likewise. + * config/riscv/riscv-vector-builtins.h (struct rvv_arg_type_info): + Likewise. + +2022-10-24 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config/riscv/riscv-vector-switch.def (ENTRY): Remove unused TI/TF vector modes. + +2022-10-24 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config/riscv/riscv.h (REG_CLASS_CONTENTS): Fix ALL_REGS. + +2022-10-22 Michael Eager <eager@eagercon.com> + + * config/microblaze/microblaze.cc + (microblaze_legitimize_address): Initialize 'reg' to NULL, check for NULL. + (microblaze_address_insns): Replace abort() with gcc_unreachable(). + (print_operand_address): Same. + (microblaze_expand_move): Initialize 'p1' to NULL, check for NULL. + (get_branch_target): Replace abort() with gcc_unreachable(). + +2022-10-22 Aldy Hernandez <aldyh@redhat.com> + + * value-range.cc (range_tests_floats): Predicate [-Inf, +Inf] test + with !flag_finite_math_only. + +2022-10-22 Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp> + + * config/xtensa/xtensa.cc (xtensa_conditional_register_usage): + Remove register A0 from FIXED_REGS if the CALL0 ABI. + (xtensa_expand_epilogue): Change to emit '(use (reg:SI A0_REG))' + unconditionally after restoring callee-saved registers for + sibling-call functions, in order to prevent misleading that + register A0 is free to use. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR target/107322 + * config/i386/i386-expand.cc (ix86_prepare_fp_compare_args): For + BFmode comparisons promote arguments to SFmode and recurse. + (ix86_expand_int_movcc, ix86_expand_fp_movcc): Return false early + if comparison operands are BFmode and operands[1] is not + ix86_fp_comparison_operator. + +2022-10-21 Tejas Joshi <TejasSanjay.Joshi@amd.com> + + * common/config/i386/cpuinfo.h (get_amd_cpu): Recognize znver4. + * common/config/i386/i386-common.cc (processor_names): Add znver4. + (processor_alias_table): Add znver4 and modularize old znvers. + * common/config/i386/i386-cpuinfo.h (processor_subtypes): + AMDFAM19H_ZNVER4. + * config.gcc (x86_64-*-* |...): Likewise. + * config/i386/driver-i386.cc (host_detect_local_cpu): Let + -march=native recognize znver4 cpus. + * config/i386/i386-c.cc (ix86_target_macros_internal): Add znver4. + * config/i386/i386-options.cc (m_ZNVER4): New definition. + (m_ZNVER): Include m_ZNVER4. + (processor_cost_table): Add znver4. + * config/i386/i386.cc (ix86_reassociation_width): Likewise. + * config/i386/i386.h (processor_type): Add PROCESSOR_ZNVER4. + (PTA_ZNVER1): New definition. + (PTA_ZNVER2): Likewise. + (PTA_ZNVER3): Likewise. + (PTA_ZNVER4): Likewise. + * config/i386/i386.md (define_attr "cpu"): Add znver4 and rename + md file. + * config/i386/x86-tune-costs.h (znver4_cost): New definition. + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add znver4. + (ix86_adjust_cost): Likewise. + * config/i386/znver1.md: Rename to znver.md. + * config/i386/znver.md: Add new reservations for znver4. + * doc/extend.texi: Add details about znver4. + * doc/invoke.texi: Likewise. + +2022-10-21 Richard Biener <rguenther@suse.de> + + PR tree-optimization/107323 + * tree-loop-distribution.cc (pg_unmark_merged_alias_ddrs): + New function. + (loop_distribution::break_alias_scc_partitions): Revert + postorder save/restore from the PR94125 fix. Instead + make sure to not ignore edges from SCCs we are going to + merge. + +2022-10-21 Monk Chiang <monk.chiang@sifive.com> + + * config/riscv/riscv.md: Add atomic type attribute. + * config/riscv/sync.md: Add atomic type for atomic instructions. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/54346 + * match.pd ((vec_perm (vec_perm@0 @1 @2 VECTOR_CST) @0 VECTOR_CST)): + Optimize nested VEC_PERM_EXPRs even if target can't handle the + new one provided we don't increase number of VEC_PERM_EXPRs the + target can't handle. + +2022-10-21 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config.gcc: Add riscv-vector-builtins-bases.o and riscv-vector-builtins-shapes.o + * config/riscv/riscv-vector-builtins.cc (DEF_RVV_I_OPS): New macro. + (DEF_RVV_FUNCTION): Ditto. + (handle_pragma_vector): Add intrinsic framework. + * config/riscv/riscv.cc (riscv_print_operand): Add operand print for vsetvl/vsetvlmax. + * config/riscv/riscv.md: include vector.md. + * config/riscv/t-riscv: Add riscv-vector-builtins-bases.o and riscv-vector-builtins-shapes.o + * config/riscv/riscv-vector-builtins-bases.cc: New file. + * config/riscv/riscv-vector-builtins-bases.h: New file. + * config/riscv/riscv-vector-builtins-functions.def: New file. + * config/riscv/riscv-vector-builtins-shapes.cc: New file. + * config/riscv/riscv-vector-builtins-shapes.h: New file. + * config/riscv/riscv-vector-builtins-types.def: New file. + * config/riscv/vector.md: New file. + +2022-10-21 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * config.gcc: Add gt files since function_instance is GTY ((user)). + * config/riscv/riscv-builtins.cc (riscv_init_builtins): Add RVV intrinsic framework. + (riscv_builtin_decl): Ditto. + (riscv_expand_builtin): Ditto. + * config/riscv/riscv-protos.h (builtin_decl): New function. + (expand_builtin): Ditto. + (enum riscv_builtin_class): New enum to classify RVV intrinsic and RISC-V general built-in. + * config/riscv/riscv-vector-builtins.cc (class GTY): New declaration. + (struct registered_function_hasher): New struct. + (DEF_RVV_OP_TYPE): New macro. + (DEF_RVV_TYPE): Ditto. + (DEF_RVV_PRED_TYPE): Ditto. + (GTY): New declaration. + (add_attribute): New function. + (check_required_extensions): Ditto. + (rvv_arg_type_info::get_tree_type): Ditto. + (function_instance::function_instance): Ditto. + (function_instance::operator==): Ditto. + (function_instance::any_type_float_p): Ditto. + (function_instance::get_return_type): Ditto. + (function_instance::get_arg_type): Ditto. + (function_instance::hash): Ditto. + (function_instance::call_properties): Ditto. + (function_instance::reads_global_state_p): Ditto. + (function_instance::modifies_global_state_p): Ditto. + (function_instance::could_trap_p): Ditto. + (function_builder::function_builder): Ditto. + (function_builder::~function_builder): Ditto. + (function_builder::allocate_argument_types): Ditto. + (function_builder::register_function_group): Ditto. + (function_builder::append_name): Ditto. + (function_builder::finish_name): Ditto. + (function_builder::get_attributes): Ditto. + (function_builder::add_function): Ditto. + (function_builder::add_unique_function): Ditto. + (function_call_info::function_call_info): Ditto. + (function_expander::function_expander): Ditto. + (function_expander::add_input_operand): Ditto. + (function_expander::generate_insn): Ditto. + (registered_function_hasher::hash): Ditto. + (registered_function_hasher::equal): Ditto. + (builtin_decl): Ditto. + (expand_builtin): Ditto. + (gt_ggc_mx): Define for using GCC garbage collect. + (gt_pch_nx): Define for using GCC garbage collect. + * config/riscv/riscv-vector-builtins.def (DEF_RVV_OP_TYPE): New macro. + (DEF_RVV_PRED_TYPE): Ditto. + (vbool64_t): Add suffix. + (vbool32_t): Ditto. + (vbool16_t): Ditto. + (vbool8_t): Ditto. + (vbool4_t): Ditto. + (vbool2_t): Ditto. + (vbool1_t): Ditto. + (vint8mf8_t): Ditto. + (vuint8mf8_t): Ditto. + (vint8mf4_t): Ditto. + (vuint8mf4_t): Ditto. + (vint8mf2_t): Ditto. + (vuint8mf2_t): Ditto. + (vint8m1_t): Ditto. + (vuint8m1_t): Ditto. + (vint8m2_t): Ditto. + (vuint8m2_t): Ditto. + (vint8m4_t): Ditto. + (vuint8m4_t): Ditto. + (vint8m8_t): Ditto. + (vuint8m8_t): Ditto. + (vint16mf4_t): Ditto. + (vuint16mf4_t): Ditto. + (vint16mf2_t): Ditto. + (vuint16mf2_t): Ditto. + (vint16m1_t): Ditto. + (vuint16m1_t): Ditto. + (vint16m2_t): Ditto. + (vuint16m2_t): Ditto. + (vint16m4_t): Ditto. + (vuint16m4_t): Ditto. + (vint16m8_t): Ditto. + (vuint16m8_t): Ditto. + (vint32mf2_t): Ditto. + (vuint32mf2_t): Ditto. + (vint32m1_t): Ditto. + (vuint32m1_t): Ditto. + (vint32m2_t): Ditto. + (vuint32m2_t): Ditto. + (vint32m4_t): Ditto. + (vuint32m4_t): Ditto. + (vint32m8_t): Ditto. + (vuint32m8_t): Ditto. + (vint64m1_t): Ditto. + (vuint64m1_t): Ditto. + (vint64m2_t): Ditto. + (vuint64m2_t): Ditto. + (vint64m4_t): Ditto. + (vuint64m4_t): Ditto. + (vint64m8_t): Ditto. + (vuint64m8_t): Ditto. + (vfloat32mf2_t): Ditto. + (vfloat32m1_t): Ditto. + (vfloat32m2_t): Ditto. + (vfloat32m4_t): Ditto. + (vfloat32m8_t): Ditto. + (vfloat64m1_t): Ditto. + (vfloat64m2_t): Ditto. + (vfloat64m4_t): Ditto. + (vfloat64m8_t): Ditto. + (vv): Ditto. + (vx): Ditto. + (v): Ditto. + (wv): Ditto. + (wx): Ditto. + (x_x_v): Ditto. + (vf2): Ditto. + (vf4): Ditto. + (vf8): Ditto. + (vvm): Ditto. + (vxm): Ditto. + (x_x_w): Ditto. + (v_v): Ditto. + (v_x): Ditto. + (vs): Ditto. + (mm): Ditto. + (m): Ditto. + (vf): Ditto. + (vm): Ditto. + (wf): Ditto. + (vfm): Ditto. + (v_f): Ditto. + (ta): Ditto. + (tu): Ditto. + (ma): Ditto. + (mu): Ditto. + (tama): Ditto. + (tamu): Ditto. + (tuma): Ditto. + (tumu): Ditto. + (tam): Ditto. + (tum): Ditto. + * config/riscv/riscv-vector-builtins.h (GCC_RISCV_VECTOR_BUILTINS_H): New macro. + (RVV_REQUIRE_RV64BIT): Ditto. + (RVV_REQUIRE_ZVE64): Ditto. + (RVV_REQUIRE_ELEN_FP_32): Ditto. + (RVV_REQUIRE_ELEN_FP_64): Ditto. + (enum operand_type_index): New enum. + (DEF_RVV_OP_TYPE): New macro. + (enum predication_type_index): New enum. + (DEF_RVV_PRED_TYPE): New macro. + (enum rvv_base_type): New enum. + (struct rvv_builtin_suffixes): New struct. + (struct rvv_arg_type_info): Ditto. + (struct rvv_type_info): Ditto. + (struct rvv_op_info): Ditto. + (class registered_function): New class. + (class function_base): Ditto. + (class function_shape): Ditto. + (struct function_group_info): New struct. + (class GTY): New class. + (class function_builder): Ditto. + (class function_call_info): Ditto. + (function_call_info::function_returns_void_p): New function. + (class function_expander): New class. + (function_instance::operator!=): New function. + (function_expander::expand): Ditto. + (function_expander::add_input_operand): Ditto. + (function_base::call_properties): Ditto. + +2022-10-21 Haochen Jiang <haochen.jiang@intel.com> + + * config/i386/sse.md (ssedvecmode): Rename from VI1SI. + (ssedvecmodelower): Rename from vi1si. + (sdot_prod<mode>): New define_expand. + (udot_prod<mode>): Ditto. + +2022-10-21 Kong Lingling <lingling.kong@intel.com> + Hongyu Wang <hongyu.wang@intel.com> + Haochen Jiang <haochen.jiang@intel.com> + + * common/config/i386/cpuinfo.h (get_available_features): Detect + avxvnniint8. + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New. + (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto. + (ix86_handle_option): Handle -mavxvnniint8. + * common/config/i386/i386-cpuinfo.h (enum processor_features): + Add FEATURE_AVXVNNIINT8. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + avxvnniint8. + * config.gcc: Add avxvnniint8intrin.h. + * config/i386/avxvnniint8intrin.h: New file. + * config/i386/cpuid.h (bit_AVXVNNIINT8): New. + * config/i386/i386-builtin.def: Add new builtins. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AVXVNNIINT8__. + * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8. + (ix86_valid_target_attribute_inner_p): Handle avxvnniint8. + * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New.. + * config/i386/i386.opt: Add option -mavxvnniint8. + * config/i386/immintrin.h: Include avxvnniint8intrin.h. + * config/i386/sse.md (UNSPEC_VPMADDUBSWACCD + UNSPEC_VPMADDUBSWACCSSD,UNSPEC_VPMADDWDACCD, + UNSPEC_VPMADDWDACCSSD): Rename according to new style. + (vpdp<vpdotprodtype>_<mode>): New define_insn. + * doc/extend.texi: Document avxvnniint8. + * doc/invoke.texi: Document -mavxvnniint8. + * doc/sourcebuild.texi: Document target avxvnniint8. + +2022-10-21 Hongyu Wang <hongyu.wang@intel.com> + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET, + OPTION_MASK_ISA2_AVX2_UNSET): New macro. + (ix86_handle_option): Handle -mavxifma. + * common/config/i386/i386-cpuinfo.h (processor_types): Add + FEATURE_AVXIFMA. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for + avxifma. + * common/config/i386/cpuinfo.h (get_available_features): + Detect avxifma. + * config.gcc: Add avxifmaintrin.h + * config/i386/avx512ifmavlintrin.h: (_mm_madd52lo_epu64): Change + to macro. + (_mm_madd52hi_epu64): Likewise. + (_mm256_madd52lo_epu64): Likewise. + (_mm256_madd52hi_epu64): Likewise. + * config/i386/avxifmaintrin.h: New header. + * config/i386/cpuid.h (bit_AVXIFMA): New. + * config/i386/i386-builtin.def: Add new builtins, and correct + pattern names for AVX512IFMA. + * config/i386/i386-builtins.cc (def_builtin): Handle AVX-IFMA + builtins like AVX-VNNI. + * config/i386/i386-c.cc (ix86_target_macros_internal): Define + __AVXIFMA__. + * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): + Relax ISA masks for AVXIFMA. + * config/i386/i386-isa.def: Add AVXIFMA. + * config/i386/i386-options.cc (isa2_opts): Add -mavxifma. + (ix86_valid_target_attribute_inner_p): Handle avxifma. + * config/i386/i386.md (isa): Add attr avxifma and avxifmavl. + * config/i386/i386.opt: Add option -mavxifma. + * config/i386/immintrin.h: Inculde avxifmaintrin.h. + * config/i386/sse.md (avx_vpmadd52<vpmadd52type>_<mode>): + Remove. + (vpamdd52<vpmadd52type><mode><sd_maskz_name>): Remove. + (vpamdd52huq<mode>_maskz): Rename to ... + (vpmadd52huq<mode>_maskz): ... this. + (vpamdd52luq<mode>_maskz): Rename to ... + (vpmadd52luq<mode>_maskz): ... this. + (vpmadd52<vpmadd52type><mode>): New define_insn. + (vpmadd52<vpmadd52type>v8di): Likewise. + (vpmadd52<vpmadd52type><mode>_maskz_1): Likewise. + (vpamdd52<vpmadd52type><mode>_mask): Rename to ... + (vpmadd52<vpmadd52type><mode>_mask): ... this. + * doc/invoke.texi: Document -mavxifma. + * doc/extend.texi: Document avxifma. + * doc/sourcebuild.texi: Document target avxifma. + 2022-10-20 Aldy Hernandez <aldyh@redhat.com> PR c++/106654 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 0f7d77c..03e9228 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221021 +20221026 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 375a87f..733580a 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,68 @@ +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/107349 + * varargs.cc (get_va_copy_arg): Fix the non-pointer case. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/107345 + * region-model.cc (region_model::eval_condition_without_cm): + Ensure that constants are on the right-hand side before checking + for them. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + * engine.cc (impl_region_model_context::get_malloc_map): Replace + with... + (impl_region_model_context::get_state_map_by_name): ...this. + (impl_region_model_context::get_fd_map): Delete. + (impl_region_model_context::get_taint_map): Delete. + * exploded-graph.h (impl_region_model_context::get_fd_map): + Delete. + (impl_region_model_context::get_malloc_map): Delete. + (impl_region_model_context::get_taint_map): Delete. + (impl_region_model_context::get_state_map_by_name): New. + * region-model.h (region_model_context::get_state_map_by_name): + New vfunc. + (region_model_context::get_fd_map): Convert from vfunc to + function. + (region_model_context::get_malloc_map): Likewise. + (region_model_context::get_taint_map): Likewise. + (noop_region_model_context::get_state_map_by_name): New. + (noop_region_model_context::get_fd_map): Delete. + (noop_region_model_context::get_malloc_map): Delete. + (noop_region_model_context::get_taint_map): Delete. + (region_model_context_decorator::get_state_map_by_name): New. + (region_model_context_decorator::get_fd_map): Delete. + (region_model_context_decorator::get_malloc_map): Delete. + (region_model_context_decorator::get_taint_map): Delete. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/106300 + * engine.cc (impl_region_model_context::get_fd_map): New. + * exploded-graph.h (impl_region_model_context::get_fd_map): New + decl. + * region-model-impl-calls.cc (region_model::impl_call_pipe): New. + * region-model.cc (region_model::update_for_int_cst_return): New, + based on... + (region_model::update_for_zero_return): ...this. Reimplement in + terms of the former. + (region_model::on_call_pre): Handle "pipe" and "pipe2". + (region_model::on_call_post): Likewise. + * region-model.h (region_model::impl_call_pipe): New decl. + (region_model::update_for_int_cst_return): New decl. + (region_model::mark_as_valid_fd): New decl. + (region_model_context::get_fd_map): New pure virtual fn. + (noop_region_model_context::get_fd_map): New. + (region_model_context_decorator::get_fd_map): New. + * sm-fd.cc: Include "analyzer/program-state.h". + (fd_state_machine::describe_state_change): Handle transitions from + start state to valid states. + (fd_state_machine::mark_as_valid_fd): New. + (fd_state_machine::on_stmt): Add missing return for "creat". + (region_model::mark_as_valid_fd): New. + 2022-10-19 David Malcolm <dmalcolm@redhat.com> PR analyzer/105765 diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 46bcaed..52978dd 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -214,35 +214,21 @@ impl_region_model_context::terminate_path () } bool -impl_region_model_context::get_malloc_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) -{ - unsigned malloc_sm_idx; - if (!m_ext_state.get_sm_idx_by_name ("malloc", &malloc_sm_idx)) - return false; - - *out_smap = m_new_state->m_checker_states[malloc_sm_idx]; - *out_sm = &m_ext_state.get_sm (malloc_sm_idx); - *out_sm_idx = malloc_sm_idx; - return true; -} - -bool -impl_region_model_context::get_taint_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) +impl_region_model_context::get_state_map_by_name (const char *name, + sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) { if (!m_new_state) return false; - unsigned taint_sm_idx; - if (!m_ext_state.get_sm_idx_by_name ("taint", &taint_sm_idx)) + unsigned sm_idx; + if (!m_ext_state.get_sm_idx_by_name (name, &sm_idx)) return false; - *out_smap = m_new_state->m_checker_states[taint_sm_idx]; - *out_sm = &m_ext_state.get_sm (taint_sm_idx); - *out_sm_idx = taint_sm_idx; + *out_smap = m_new_state->m_checker_states[sm_idx]; + *out_sm = &m_ext_state.get_sm (sm_idx); + *out_sm_idx = sm_idx; return true; } diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index 11e46ca..5996252 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -96,12 +96,10 @@ class impl_region_model_context : public region_model_context { return &m_ext_state; } - bool get_malloc_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) final override; - bool get_taint_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) final override; + bool get_state_map_by_name (const char *name, + sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) override; const gimple *get_stmt () const override { return m_stmt; } diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 8f4940a..52c4205 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -563,6 +563,76 @@ region_model::impl_call_memset (const call_details &cd) fill_region (sized_dest_reg, fill_value_u8); } +/* Handle the on_call_post part of "pipe". */ + +void +region_model::impl_call_pipe (const call_details &cd) +{ + class failure : public failed_call_info + { + public: + failure (const call_details &cd) : failed_call_info (cd) {} + + bool update_model (region_model *model, + const exploded_edge *, + region_model_context *ctxt) const final override + { + /* Return -1; everything else is unchanged. */ + const call_details cd (get_call_details (model, ctxt)); + model->update_for_int_cst_return (cd, -1, true); + return true; + } + }; + + class success : public success_call_info + { + public: + success (const call_details &cd) : success_call_info (cd) {} + + bool update_model (region_model *model, + const exploded_edge *, + region_model_context *ctxt) const final override + { + const call_details cd (get_call_details (model, ctxt)); + + /* Return 0. */ + model->update_for_zero_return (cd, true); + + /* Update fd array. */ + region_model_manager *mgr = cd.get_manager (); + tree arr_tree = cd.get_arg_tree (0); + const svalue *arr_sval = cd.get_arg_svalue (0); + for (int idx = 0; idx < 2; idx++) + { + const region *arr_reg + = model->deref_rvalue (arr_sval, arr_tree, cd.get_ctxt ()); + const svalue *idx_sval + = mgr->get_or_create_int_cst (integer_type_node, idx); + const region *element_reg + = mgr->get_element_region (arr_reg, integer_type_node, idx_sval); + conjured_purge p (model, cd.get_ctxt ()); + const svalue *fd_sval + = mgr->get_or_create_conjured_svalue (integer_type_node, + cd.get_call_stmt (), + element_reg, + p); + model->set_value (element_reg, fd_sval, cd.get_ctxt ()); + model->mark_as_valid_fd (fd_sval, cd.get_ctxt ()); + + } + return true; + } + }; + + /* Body of region_model::impl_call_pipe. */ + if (cd.get_ctxt ()) + { + cd.get_ctxt ()->bifurcate (new failure (cd)); + cd.get_ctxt ()->bifurcate (new success (cd)); + cd.get_ctxt ()->terminate_path (); + } +} + /* A subclass of pending_diagnostic for complaining about 'putenv' called on an auto var. */ diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 81ef41e..7c44fc9 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1976,23 +1976,36 @@ maybe_get_const_fn_result (const call_details &cd) return sval; } -/* Update this model for an outcome of a call that returns zero. +/* Update this model for an outcome of a call that returns a specific + integer constant. If UNMERGEABLE, then make the result unmergeable, e.g. to prevent the state-merger code from merging success and failure outcomes. */ void -region_model::update_for_zero_return (const call_details &cd, - bool unmergeable) +region_model::update_for_int_cst_return (const call_details &cd, + int retval, + bool unmergeable) { if (!cd.get_lhs_type ()) return; const svalue *result - = m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0); + = m_mgr->get_or_create_int_cst (cd.get_lhs_type (), retval); if (unmergeable) result = m_mgr->get_or_create_unmergeable (result); set_value (cd.get_lhs_region (), result, cd.get_ctxt ()); } +/* Update this model for an outcome of a call that returns zero. + If UNMERGEABLE, then make the result unmergeable, e.g. to prevent + the state-merger code from merging success and failure outcomes. */ + +void +region_model::update_for_zero_return (const call_details &cd, + bool unmergeable) +{ + update_for_int_cst_return (cd, 0, unmergeable); +} + /* Update this model for an outcome of a call that returns non-zero. */ void @@ -2302,6 +2315,14 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt, impl_call_memset (cd); return false; } + else if (is_named_call_p (callee_fndecl, "pipe", call, 1) + || is_named_call_p (callee_fndecl, "pipe2", call, 2)) + { + /* Handle in "on_call_post"; bail now so that fd array + is left untouched so that we can detect use-of-uninit + for the case where the call fails. */ + return false; + } else if (is_named_call_p (callee_fndecl, "putenv", call, 1) && POINTER_TYPE_P (cd.get_arg_type (0))) { @@ -2382,6 +2403,12 @@ region_model::on_call_post (const gcall *call, impl_call_operator_delete (cd); return; } + else if (is_named_call_p (callee_fndecl, "pipe", call, 1) + || is_named_call_p (callee_fndecl, "pipe2", call, 2)) + { + impl_call_pipe (cd); + return; + } /* Was this fndecl referenced by __attribute__((malloc(FOO)))? */ if (lookup_attribute ("*dealloc", DECL_ATTRIBUTES (callee_fndecl))) @@ -4185,10 +4212,19 @@ region_model::eval_condition_without_cm (const svalue *lhs, /* Otherwise, only known through constraints. */ } - /* If we have a pair of constants, compare them. */ if (const constant_svalue *cst_lhs = lhs->dyn_cast_constant_svalue ()) - if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ()) - return constant_svalue::eval_condition (cst_lhs, op, cst_rhs); + { + /* If we have a pair of constants, compare them. */ + if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ()) + return constant_svalue::eval_condition (cst_lhs, op, cst_rhs); + else + { + /* When we have one constant, put it on the RHS. */ + std::swap (lhs, rhs); + op = swap_tree_comparison (op); + } + } + gcc_assert (lhs->get_kind () != SK_CONSTANT); /* Handle comparison against zero. */ if (const constant_svalue *cst_rhs = rhs->dyn_cast_constant_svalue ()) diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 635a0c2..19e8043 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -356,6 +356,7 @@ class region_model void impl_call_malloc (const call_details &cd); void impl_call_memcpy (const call_details &cd); void impl_call_memset (const call_details &cd); + void impl_call_pipe (const call_details &cd); void impl_call_putenv (const call_details &cd); void impl_call_realloc (const call_details &cd); void impl_call_strchr (const call_details &cd); @@ -373,6 +374,9 @@ class region_model const svalue *maybe_get_copy_bounds (const region *src_reg, const svalue *num_bytes_sval); + void update_for_int_cst_return (const call_details &cd, + int retval, + bool unmergeable); void update_for_zero_return (const call_details &cd, bool unmergeable); void update_for_nonzero_return (const call_details &cd); @@ -539,6 +543,9 @@ class region_model const region *src_reg, region_model_context *ctxt); + /* Implemented in sm-fd.cc */ + void mark_as_valid_fd (const svalue *sval, region_model_context *ctxt); + /* Implemented in sm-malloc.cc */ void on_realloc_with_move (const call_details &cd, const svalue *old_ptr_sval, @@ -730,15 +737,33 @@ class region_model_context virtual const extrinsic_state *get_ext_state () const = 0; - /* Hook for clients to access the "malloc" state machine in + /* Hook for clients to access the a specific state machine in any underlying program_state. */ - virtual bool get_malloc_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) = 0; - /* Likewise for the "taint" state machine. */ - virtual bool get_taint_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) = 0; + virtual bool get_state_map_by_name (const char *name, + sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) = 0; + + /* Precanned ways for clients to access specific state machines. */ + bool get_fd_map (sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) + { + return get_state_map_by_name ("file-descriptor", out_smap, out_sm, + out_sm_idx); + } + bool get_malloc_map (sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) + { + return get_state_map_by_name ("malloc", out_smap, out_sm, out_sm_idx); + } + bool get_taint_map (sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) + { + return get_state_map_by_name ("taint", out_smap, out_sm, out_sm_idx); + } /* Get the current statement, if any. */ virtual const gimple *get_stmt () const = 0; @@ -785,15 +810,10 @@ public: const extrinsic_state *get_ext_state () const override { return NULL; } - bool get_malloc_map (sm_state_map **, - const state_machine **, - unsigned *) override - { - return false; - } - bool get_taint_map (sm_state_map **, - const state_machine **, - unsigned *) override + bool get_state_map_by_name (const char *, + sm_state_map **, + const state_machine **, + unsigned *) override { return false; } @@ -912,18 +932,12 @@ class region_model_context_decorator : public region_model_context return m_inner->get_ext_state (); } - bool get_malloc_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) override - { - return m_inner->get_malloc_map (out_smap, out_sm, out_sm_idx); - } - - bool get_taint_map (sm_state_map **out_smap, - const state_machine **out_sm, - unsigned *out_sm_idx) override + bool get_state_map_by_name (const char *name, + sm_state_map **out_smap, + const state_machine **out_sm, + unsigned *out_sm_idx) override { - return m_inner->get_taint_map (out_smap, out_sm, out_sm_idx); + return m_inner->get_state_map_by_name (name, out_smap, out_sm, out_sm_idx); } const gimple *get_stmt () const override diff --git a/gcc/analyzer/sm-fd.cc b/gcc/analyzer/sm-fd.cc index c4ad91c..8a4c208 100644 --- a/gcc/analyzer/sm-fd.cc +++ b/gcc/analyzer/sm-fd.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/store.h" #include "analyzer/region-model.h" #include "bitmap.h" +#include "analyzer/program-state.h" #if ENABLE_ANALYZER @@ -121,6 +122,12 @@ public: /* Function for one-to-one correspondence between valid and unchecked states. */ state_t valid_to_unchecked_state (state_t state) const; + + void mark_as_valid_fd (region_model *model, + sm_state_map *smap, + const svalue *fd_sval, + const extrinsic_state &ext_state) const; + /* State for a constant file descriptor (>= 0) */ state_t m_constant_fd; @@ -201,15 +208,19 @@ public: describe_state_change (const evdesc::state_change &change) override { if (change.m_old_state == m_sm.get_start_state () - && m_sm.is_unchecked_fd_p (change.m_new_state)) + && (m_sm.is_unchecked_fd_p (change.m_new_state) + || m_sm.is_valid_fd_p (change.m_new_state))) { - if (change.m_new_state == m_sm.m_unchecked_read_write) + if (change.m_new_state == m_sm.m_unchecked_read_write + || change.m_new_state == m_sm.m_valid_read_write) return change.formatted_print ("opened here as read-write"); - if (change.m_new_state == m_sm.m_unchecked_read_only) + if (change.m_new_state == m_sm.m_unchecked_read_only + || change.m_new_state == m_sm.m_valid_read_only) return change.formatted_print ("opened here as read-only"); - if (change.m_new_state == m_sm.m_unchecked_write_only) + if (change.m_new_state == m_sm.m_unchecked_write_only + || change.m_new_state == m_sm.m_valid_write_only) return change.formatted_print ("opened here as write-only"); } @@ -748,6 +759,15 @@ fd_state_machine::valid_to_unchecked_state (state_t state) const return NULL; } +void +fd_state_machine::mark_as_valid_fd (region_model *model, + sm_state_map *smap, + const svalue *fd_sval, + const extrinsic_state &ext_state) const +{ + smap->set_state (model, fd_sval, m_valid_read_write, NULL, ext_state); +} + bool fd_state_machine::on_stmt (sm_context *sm_ctxt, const supernode *node, const gimple *stmt) const @@ -764,6 +784,7 @@ fd_state_machine::on_stmt (sm_context *sm_ctxt, const supernode *node, if (is_named_call_p (callee_fndecl, "creat", call, 2)) { on_creat (sm_ctxt, node, stmt, call); + return true; } // "creat" if (is_named_call_p (callee_fndecl, "close", call, 1)) @@ -1186,6 +1207,33 @@ make_fd_state_machine (logger *logger) { return new fd_state_machine (logger); } + +/* Specialcase hook for handling pipe, for use by + region_model::impl_call_pipe::success::update_model. */ + +void +region_model::mark_as_valid_fd (const svalue *sval, region_model_context *ctxt) +{ + if (!ctxt) + return; + const extrinsic_state *ext_state = ctxt->get_ext_state (); + if (!ext_state) + return; + + sm_state_map *smap; + const state_machine *sm; + unsigned sm_idx; + if (!ctxt->get_fd_map (&smap, &sm, &sm_idx)) + return; + + gcc_assert (smap); + gcc_assert (sm); + + const fd_state_machine &fd_sm = (const fd_state_machine &)*sm; + + fd_sm.mark_as_valid_fd (this, smap, sval, *ext_state); +} + } // namespace ana #endif // ENABLE_ANALYZER diff --git a/gcc/analyzer/varargs.cc b/gcc/analyzer/varargs.cc index 20c83db..e4dbad7 100644 --- a/gcc/analyzer/varargs.cc +++ b/gcc/analyzer/varargs.cc @@ -171,9 +171,8 @@ get_va_copy_arg (const region_model *model, const svalue *arg_sval = model->get_rvalue (arg, ctxt); if (const svalue *cast = arg_sval->maybe_undo_cast ()) arg_sval = cast; - /* Expect a POINTER_TYPE; does it point to an array type? */ - gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE); - if (TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) == ARRAY_TYPE) + if (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) == ARRAY_TYPE) { /* va_list_arg_type_node is a pointer to a va_list; return *ARG_SVAL. */ diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index ca48404..9730732 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -363,7 +363,8 @@ get_combined_location (location_t loc, tree decl) /* TODO: allow more bits for line and less bits for discriminator. */ if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes"); - return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); + return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16) + | get_discriminator_from_loc (loc); } /* Return the function decl of a given lexical BLOCK. */ @@ -652,7 +653,7 @@ function_instance::read_function_instance (function_instance_stack *stack, for (unsigned i = 0; i < num_pos_counts; i++) { - unsigned offset = gcov_read_unsigned () & 0xffff0000; + unsigned offset = gcov_read_unsigned (); unsigned num_targets = gcov_read_unsigned (); gcov_type count = gcov_read_counter (); s->pos_counts[offset].count = count; diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 5f319b2..26898d7 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -9521,10 +9521,10 @@ fold_builtin_FILE (location_t loc) __FILE__ macro so it appears appropriate to use the same file prefix mappings. */ fname = remap_macro_filename (fname); - return build_string_literal (strlen (fname) + 1, fname); + return build_string_literal (fname); } - return build_string_literal (1, ""); + return build_string_literal (""); } /* Fold a call to __builtin_FUNCTION to a constant string. */ @@ -9537,7 +9537,7 @@ fold_builtin_FUNCTION () if (current_function_decl) name = lang_hooks.decl_printable_name (current_function_decl, 0); - return build_string_literal (strlen (name) + 1, name); + return build_string_literal (name); } /* Fold a call to __builtin_LINE to an integer constant. */ diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 76fe5fe..2ac8eaa 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,17 @@ +2022-10-24 Jakub Jelinek <jakub@redhat.com> + + PR c++/107358 + * c-typeck.cc (build_binary_op): Pass operands before excess precision + promotions to scalar_to_vector call. + +2022-10-24 Arsen Arsenović <arsen@aarsen.me> + + * c-decl.cc (finish_function): Ignore hosted when deciding + whether to implicitly return zero, but check noreturn. + * c-objc-common.cc (c_missing_noreturn_ok_p): Loosen the + requirements to just MAIN_NAME_P when hosted, or `int main' + otherwise. + 2022-10-20 Richard Biener <rguenther@suse.de> PR c/107305 diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 80f6e91..4746e31 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -10542,7 +10542,7 @@ finish_function (location_t end_loc) if (DECL_RESULT (fndecl) && DECL_RESULT (fndecl) != error_mark_node) DECL_CONTEXT (DECL_RESULT (fndecl)) = fndecl; - if (MAIN_NAME_P (DECL_NAME (fndecl)) && flag_hosted + if (MAIN_NAME_P (DECL_NAME (fndecl)) && !TREE_THIS_VOLATILE (fndecl) && TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (fndecl))) == integer_type_node && flag_isoc99) { diff --git a/gcc/c/c-objc-common.cc b/gcc/c/c-objc-common.cc index 70e10a9..b468091 100644 --- a/gcc/c/c-objc-common.cc +++ b/gcc/c/c-objc-common.cc @@ -37,9 +37,12 @@ static bool c_tree_printer (pretty_printer *, text_info *, const char *, bool c_missing_noreturn_ok_p (tree decl) { - /* A missing noreturn is not ok for freestanding implementations and - ok for the `main' function in hosted implementations. */ - return flag_hosted && MAIN_NAME_P (DECL_ASSEMBLER_NAME (decl)); + /* A missing noreturn is ok for the `main' function. */ + if (!MAIN_NAME_P (DECL_ASSEMBLER_NAME (decl))) + return false; + + return flag_hosted + || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (decl))) == integer_type_node; } /* Called from check_global_declaration. */ diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index fdb96c2..92f3afc 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -11995,8 +11995,8 @@ build_binary_op (location_t location, enum tree_code code, if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE) || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE)) { - enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1, - true); + enum stv_conv convert_flag = scalar_to_vector (location, code, orig_op0, + orig_op1, true); switch (convert_flag) { diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index eb0fa87..bb4b3c5 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -375,6 +375,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, if (!new_inlined_to) prof_count = count.combine_with_ipa_count (prof_count); new_node->count = prof_count; + new_node->calls_declare_variant_alt = this->calls_declare_variant_alt; /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index d45451c..19ea713 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -76,6 +76,8 @@ has_cpu_feature (struct __processor_model *cpu_model, } } +/* Save FEATURE to either CPU_MODEL or CPU_FEATURES2. */ + static inline void set_cpu_feature (struct __processor_model *cpu_model, unsigned int *cpu_features2, @@ -100,6 +102,32 @@ set_cpu_feature (struct __processor_model *cpu_model, } } +/* Drop FEATURE from either CPU_MODEL or CPU_FEATURES2. */ + +static inline void +reset_cpu_feature (struct __processor_model *cpu_model, + unsigned int *cpu_features2, + enum processor_features feature) +{ + unsigned index, offset; + unsigned f = feature; + + if (f < 32) + { + /* The first 32 features. */ + cpu_model->__cpu_features[0] &= ~(1U << f); + } + else + { + /* The rest of features. cpu_features2[i] contains features from + (32 + i * 32) to (31 + 32 + i * 32), inclusively. */ + f -= 32; + index = f / 32; + offset = f % 32; + cpu_features2[index] &= ~(1U << offset); + } +} + /* Get the specific type of AMD CPU and return AMD CPU name. Return NULL for unknown AMD CPU. */ @@ -565,11 +593,11 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model, cpu_model->__cpu_type = ZHAOXIN_FAM7H; if (model == 0x3b) { - cpu = "lujiazui"; - CHECK___builtin_cpu_is ("lujiazui"); - cpu_model->__cpu_features[0] &= ~(1U <<(FEATURE_AVX & 31)); - cpu_features2[0] &= ~(1U <<((FEATURE_F16C - 32) & 31)); - cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI; + cpu = "lujiazui"; + CHECK___builtin_cpu_is ("lujiazui"); + reset_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX); + reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C); + cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI; } break; default: diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 4b01c35..f66bdd5 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -2113,7 +2113,7 @@ const pta processor_alias_table[] = {"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3, PTA_ZNVER3, M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2}, - {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4, + {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER3, PTA_ZNVER4, M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 9893fc4..761af27 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -34,8 +34,10 @@ enum processor_vendor VENDOR_CENTAUR, VENDOR_CYRIX, VENDOR_NSC, - BUILTIN_VENDOR_MAX = VENDOR_OTHER, - VENDOR_MAX + + /* Maximum values must be at the end of this enum. */ + VENDOR_MAX, + BUILTIN_VENDOR_MAX = VENDOR_OTHER }; /* Any new types or subtypes have to be inserted at the end. */ diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index c39ed2e..bd356ce 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -145,6 +145,8 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"c", ISA_SPEC_CLASS_20190608, 2, 0}, {"c", ISA_SPEC_CLASS_2P2, 2, 0}, + {"h", ISA_SPEC_CLASS_NONE, 1, 0}, + {"v", ISA_SPEC_CLASS_NONE, 1, 0}, {"zicsr", ISA_SPEC_CLASS_20191213, 2, 0}, @@ -202,6 +204,9 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0}, + {"svinval", ISA_SPEC_CLASS_NONE, 1, 0}, + {"svnapot", ISA_SPEC_CLASS_NONE, 1, 0}, + /* Terminate the list. */ {NULL, ISA_SPEC_CLASS_NONE, 0, 0} }; @@ -224,6 +229,14 @@ static const riscv_cpu_info riscv_cpu_tables[] = {NULL, NULL, NULL} }; +static const char *riscv_tunes[] = +{ +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ + TUNE_NAME, +#include "../../../config/riscv/riscv-cores.def" + NULL +}; + static const char *riscv_supported_std_ext (void); static riscv_subset_list *current_subset_list = NULL; @@ -353,21 +366,18 @@ multi_letter_subset_rank (const std::string &subset) gcc_assert (subset.length () >= 2); int high_order = -1; int low_order = 0; - /* The order between multi-char extensions: s -> h -> z -> x. */ + /* The order between multi-char extensions: s -> z -> x. */ char multiletter_class = subset[0]; switch (multiletter_class) { case 's': high_order = 0; break; - case 'h': - high_order = 1; - break; case 'z': - high_order = 2; + high_order = 1; break; case 'x': - high_order = 3; + high_order = 2; break; default: gcc_unreachable (); @@ -663,7 +673,7 @@ riscv_subset_list::lookup (const char *subset, int major_version, static const char * riscv_supported_std_ext (void) { - return "mafdqlcbkjtpvn"; + return "mafdqlcbkjtpvnh"; } /* Parsing subset version. @@ -822,7 +832,7 @@ riscv_subset_list::parse_std_ext (const char *p) { char subset[2] = {0, 0}; - if (*p == 'x' || *p == 's' || *p == 'h' || *p == 'z') + if (*p == 'x' || *p == 's' || *p == 'z') break; if (*p == '_') @@ -947,7 +957,7 @@ riscv_subset_list::handle_combine_ext () Arguments: `p`: Current parsing position. - `ext_type`: What kind of extensions, 's', 'h', 'z' or 'x'. + `ext_type`: What kind of extensions, 's', 'z' or 'x'. `ext_type_str`: Full name for kind of extension. */ const char * @@ -1089,12 +1099,6 @@ riscv_subset_list::parse (const char *arch, location_t loc) if (p == NULL) goto fail; - /* Parsing hypervisor extension. */ - p = subset_list->parse_multiletter_ext (p, "h", "hypervisor extension"); - - if (p == NULL) - goto fail; - /* Parsing sub-extensions. */ p = subset_list->parse_multiletter_ext (p, "z", "sub-extension"); @@ -1218,6 +1222,9 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] = {"zmmul", &gcc_options::x_riscv_zm_subext, MASK_ZMMUL}, + {"svinval", &gcc_options::x_riscv_sv_subext, MASK_SVINVAL}, + {"svnapot", &gcc_options::x_riscv_sv_subext, MASK_SVNAPOT}, + {NULL, NULL, 0} }; @@ -1687,6 +1694,41 @@ riscv_compute_multilib ( #define TARGET_COMPUTE_MULTILIB riscv_compute_multilib #endif +vec<const char *> +riscv_get_valid_option_values (int option_code, + const char *prefix ATTRIBUTE_UNUSED) +{ + vec<const char *> v; + v.create (0); + opt_code opt = (opt_code) option_code; + + switch (opt) + { + case OPT_mtune_: + { + const char **tune = &riscv_tunes[0]; + for (;*tune; ++tune) + v.safe_push (*tune); + + const riscv_cpu_info *cpu_info = &riscv_cpu_tables[0]; + for (;cpu_info->name; ++cpu_info) + v.safe_push (cpu_info->name); + } + break; + case OPT_mcpu_: + { + const riscv_cpu_info *cpu_info = &riscv_cpu_tables[0]; + for (;cpu_info->name; ++cpu_info) + v.safe_push (cpu_info->name); + } + break; + default: + break; + } + + return v; +} + /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ static const struct default_options riscv_option_optimization_table[] = { @@ -1701,4 +1743,7 @@ static const struct default_options riscv_option_optimization_table[] = #undef TARGET_HANDLE_OPTION #define TARGET_HANDLE_OPTION riscv_handle_option +#undef TARGET_GET_VALID_OPTION_VALUES +#define TARGET_GET_VALID_OPTION_VALUES riscv_get_valid_option_values + struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 160c52c..e8fe623 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -516,7 +516,7 @@ pru-*-*) ;; riscv*) cpu_type=riscv - extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o" + extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o" extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o" d_target_objs="riscv-d.o" extra_headers="riscv_vector.h" @@ -4675,7 +4675,7 @@ case "${target}" in ;; mips*-*-*) - supported_defaults="abi arch arch_32 arch_64 float fpu nan fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 madd4" + supported_defaults="abi arch arch_32 arch_64 float fpu nan fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 madd4 compact-branches" case ${with_float} in "" | soft | hard) @@ -4828,6 +4828,15 @@ case "${target}" in exit 1 ;; esac + + case ${with_compact_branches} in + "" | never | always | optimal) + ;; + *) + echo "Unknown compact-branches policy used in --with-compact-branches" 1>&2 + exit 1 + ;; + esac ;; loongarch*-*-*) @@ -5772,7 +5781,7 @@ case ${target} in esac t= -all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls lxc1-sxc1 madd4 isa_spec" +all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls lxc1-sxc1 madd4 isa_spec compact-branches" for option in $all_defaults do eval "val=\$with_"`echo $option | sed s/-/_/g` diff --git a/gcc/config.in b/gcc/config.in index 5e41748..38ef792 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -2324,6 +2324,12 @@ #endif +/* Specify if mutliarch is enabled. */ +#ifndef USED_FOR_TARGET +#undef ENABLE_MULTIARCH +#endif + + /* The size of `dev_t', as computed by sizeof. */ #ifndef USED_FOR_TARGET #undef SIZEOF_DEV_T diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1d0f994..5d1ab5a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, aarch64_addr_query_type); -static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); /* The processor for which instructions should be scheduled. */ enum aarch64_processor aarch64_tune = cortexa53; @@ -5502,6 +5501,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) factor, nelts_per_vq); } +/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ + +static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = + { + 0x0000000100000001ull, + 0x0001000100010001ull, + 0x0101010101010101ull, + 0x1111111111111111ull, + 0x5555555555555555ull, + }; + + + +/* Return true if 64-bit VAL is a valid bitmask immediate. */ +static bool +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) +{ + unsigned HOST_WIDE_INT tmp, mask, first_one, next_one; + int bits; + + /* Check for a single sequence of one bits and return quickly if so. + The special cases of all ones and all zeroes returns false. */ + tmp = val + (val & -val); + + if (tmp == (tmp & -tmp)) + return (val + 1) > 1; + + /* Invert if the immediate doesn't start with a zero bit - this means we + only need to search for sequences of one bits. */ + if (val & 1) + val = ~val; + + /* Find the first set bit and set tmp to val with the first sequence of one + bits removed. Return success if there is a single sequence of ones. */ + first_one = val & -val; + tmp = val & (val + first_one); + + if (tmp == 0) + return true; + + /* Find the next set bit and compute the difference in bit position. */ + next_one = tmp & -tmp; + bits = clz_hwi (first_one) - clz_hwi (next_one); + mask = val ^ tmp; + + /* Check the bit position difference is a power of 2, and that the first + sequence of one bits fits within 'bits' bits. */ + if ((mask >> bits) != 0 || bits != (bits & -bits)) + return false; + + /* Check the sequence of one bits is repeated 64/bits times. */ + return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26]; +} + + +/* Return true if VAL is a valid bitmask immediate for MODE. */ +bool +aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) +{ + if (mode == DImode) + return aarch64_bitmask_imm (val_in); + + unsigned HOST_WIDE_INT val = val_in; + + if (mode == SImode) + return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); + + /* Replicate small immediates to fit 64 bits. */ + int size = GET_MODE_UNIT_PRECISION (mode); + val &= (HOST_WIDE_INT_1U << size) - 1; + val *= bitmask_imm_mul[__builtin_clz (size) - 26]; + + return aarch64_bitmask_imm (val); +} + + +/* Return true if the immediate VAL can be a bitfield immediate + by changing the given MASK bits in VAL to zeroes, ones or bits + from the other half of VAL. Return the new immediate in VAL2. */ +static inline bool +aarch64_check_bitmask (unsigned HOST_WIDE_INT val, + unsigned HOST_WIDE_INT &val2, + unsigned HOST_WIDE_INT mask) +{ + val2 = val & ~mask; + if (val2 != val && aarch64_bitmask_imm (val2)) + return true; + val2 = val | mask; + if (val2 != val && aarch64_bitmask_imm (val2)) + return true; + val = val & ~mask; + val2 = val | (((val >> 32) | (val << 32)) & mask); + if (val2 != val && aarch64_bitmask_imm (val2)) + return true; + val2 = val | (((val >> 16) | (val << 48)) & mask); + if (val2 != val && aarch64_bitmask_imm (val2)) + return true; + return false; +} + + +/* Return true if val is an immediate that can be loaded into a + register by a MOVZ instruction. */ +static bool +aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) +{ + if (GET_MODE_SIZE (mode) > 4) + { + if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val + || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) + return 1; + } + else + { + /* Ignore sign extension. */ + val &= (HOST_WIDE_INT) 0xffffffff; + } + return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val + || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); +} + + +/* Return true if VAL is an immediate that can be loaded into a + register in a single instruction. */ +bool +aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) +{ + scalar_int_mode int_mode; + if (!is_a <scalar_int_mode> (mode, &int_mode)) + return false; + + if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) + return 1; + return aarch64_bitmask_imm (val, int_mode); +} + + static int aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, scalar_int_mode mode) @@ -5532,7 +5668,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); /* Check if we have to emit a second instruction by checking to see - if any of the upper 32 bits of the original DI mode value is set. */ + if any of the upper 32 bits of the original DI mode value is set. */ if (val == val2) return 1; @@ -5568,36 +5704,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); - if (zero_match != 2 && one_match != 2) + if (zero_match < 2 && one_match < 2) { /* Try emitting a bitmask immediate with a movk replacing 16 bits. For a 64-bit bitmask try whether changing 16 bits to all ones or zeroes creates a valid bitmask. To check any repeated bitmask, try using 16 bits from the other 32-bit half of val. */ - for (i = 0; i < 64; i += 16, mask <<= 16) - { - val2 = val & ~mask; - if (val2 != val && aarch64_bitmask_imm (val2, mode)) - break; - val2 = val | mask; - if (val2 != val && aarch64_bitmask_imm (val2, mode)) - break; - val2 = val2 & ~mask; - val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); - if (val2 != val && aarch64_bitmask_imm (val2, mode)) - break; - } - if (i != 64) - { - if (generate) + for (i = 0; i < 64; i += 16) + if (aarch64_check_bitmask (val, val2, mask << i)) + { + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + } + return 2; + } + } + + /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ + if (zero_match + one_match == 0) + { + for (i = 0; i < 48; i += 16) + for (int j = i + 16; j < 64; j += 16) + if (aarch64_check_bitmask (val, val2, (mask << i) | (mask << j))) { - emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); + if (generate) + { + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + emit_insn (gen_insv_immdi (dest, GEN_INT (j), + GEN_INT ((val >> j) & 0xffff))); + } + return 3; } - return 2; - } } /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which @@ -5644,6 +5787,99 @@ aarch64_mov128_immediate (rtx imm) } +/* Return true if val can be encoded as a 12-bit unsigned immediate with + a left shift of 0 or 12 bits. */ +bool +aarch64_uimm12_shift (HOST_WIDE_INT val) +{ + return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val + || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val + ); +} + +/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate + that can be created with a left shift of 0 or 12. */ +static HOST_WIDE_INT +aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) +{ + /* Check to see if the value fits in 24 bits, as that is the maximum we can + handle correctly. */ + gcc_assert ((val & 0xffffff) == val); + + if (((val & 0xfff) << 0) == val) + return val; + + return val & (0xfff << 12); +} + + +/* Test whether: + + X = (X & AND_VAL) | IOR_VAL; + + can be implemented using: + + MOVK X, #(IOR_VAL >> shift), LSL #shift + + Return the shift if so, otherwise return -1. */ +int +aarch64_movk_shift (const wide_int_ref &and_val, + const wide_int_ref &ior_val) +{ + unsigned int precision = and_val.get_precision (); + unsigned HOST_WIDE_INT mask = 0xffff; + for (unsigned int shift = 0; shift < precision; shift += 16) + { + if (and_val == ~mask && (ior_val & mask) == ior_val) + return shift; + mask <<= 16; + } + return -1; +} + +/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. + Assumed precondition: VAL_IN Is not zero. */ + +unsigned HOST_WIDE_INT +aarch64_and_split_imm1 (HOST_WIDE_INT val_in) +{ + int lowest_bit_set = ctz_hwi (val_in); + int highest_bit_set = floor_log2 (val_in); + gcc_assert (val_in != 0); + + return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - + (HOST_WIDE_INT_1U << lowest_bit_set)); +} + +/* Create constant where bits outside of lowest bit set to highest bit set + are set to 1. */ + +unsigned HOST_WIDE_INT +aarch64_and_split_imm2 (HOST_WIDE_INT val_in) +{ + return val_in | ~aarch64_and_split_imm1 (val_in); +} + +/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ + +bool +aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) +{ + scalar_int_mode int_mode; + if (!is_a <scalar_int_mode> (mode, &int_mode)) + return false; + + if (aarch64_bitmask_imm (val_in, int_mode)) + return false; + + if (aarch64_move_imm (val_in, int_mode)) + return false; + + unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); + + return aarch64_bitmask_imm (imm2, int_mode); +} + /* Return the number of temporary registers that aarch64_add_offset_1 would need to add OFFSET to a register. */ @@ -10099,207 +10335,6 @@ aarch64_tls_referenced_p (rtx x) } -/* Return true if val can be encoded as a 12-bit unsigned immediate with - a left shift of 0 or 12 bits. */ -bool -aarch64_uimm12_shift (HOST_WIDE_INT val) -{ - return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val - || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val - ); -} - -/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate - that can be created with a left shift of 0 or 12. */ -static HOST_WIDE_INT -aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) -{ - /* Check to see if the value fits in 24 bits, as that is the maximum we can - handle correctly. */ - gcc_assert ((val & 0xffffff) == val); - - if (((val & 0xfff) << 0) == val) - return val; - - return val & (0xfff << 12); -} - -/* Return true if val is an immediate that can be loaded into a - register by a MOVZ instruction. */ -static bool -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) -{ - if (GET_MODE_SIZE (mode) > 4) - { - if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val - || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) - return 1; - } - else - { - /* Ignore sign extension. */ - val &= (HOST_WIDE_INT) 0xffffffff; - } - return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val - || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); -} - -/* Test whether: - - X = (X & AND_VAL) | IOR_VAL; - - can be implemented using: - - MOVK X, #(IOR_VAL >> shift), LSL #shift - - Return the shift if so, otherwise return -1. */ -int -aarch64_movk_shift (const wide_int_ref &and_val, - const wide_int_ref &ior_val) -{ - unsigned int precision = and_val.get_precision (); - unsigned HOST_WIDE_INT mask = 0xffff; - for (unsigned int shift = 0; shift < precision; shift += 16) - { - if (and_val == ~mask && (ior_val & mask) == ior_val) - return shift; - mask <<= 16; - } - return -1; -} - -/* VAL is a value with the inner mode of MODE. Replicate it to fill a - 64-bit (DImode) integer. */ - -static unsigned HOST_WIDE_INT -aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) -{ - unsigned int size = GET_MODE_UNIT_PRECISION (mode); - while (size < 64) - { - val &= (HOST_WIDE_INT_1U << size) - 1; - val |= val << size; - size *= 2; - } - return val; -} - -/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ - -static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = - { - 0x0000000100000001ull, - 0x0001000100010001ull, - 0x0101010101010101ull, - 0x1111111111111111ull, - 0x5555555555555555ull, - }; - - -/* Return true if val is a valid bitmask immediate. */ - -bool -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) -{ - unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one; - int bits; - - /* Check for a single sequence of one bits and return quickly if so. - The special cases of all ones and all zeroes returns false. */ - val = aarch64_replicate_bitmask_imm (val_in, mode); - tmp = val + (val & -val); - - if (tmp == (tmp & -tmp)) - return (val + 1) > 1; - - /* Replicate 32-bit immediates so we can treat them as 64-bit. */ - if (mode == SImode) - val = (val << 32) | (val & 0xffffffff); - - /* Invert if the immediate doesn't start with a zero bit - this means we - only need to search for sequences of one bits. */ - if (val & 1) - val = ~val; - - /* Find the first set bit and set tmp to val with the first sequence of one - bits removed. Return success if there is a single sequence of ones. */ - first_one = val & -val; - tmp = val & (val + first_one); - - if (tmp == 0) - return true; - - /* Find the next set bit and compute the difference in bit position. */ - next_one = tmp & -tmp; - bits = clz_hwi (first_one) - clz_hwi (next_one); - mask = val ^ tmp; - - /* Check the bit position difference is a power of 2, and that the first - sequence of one bits fits within 'bits' bits. */ - if ((mask >> bits) != 0 || bits != (bits & -bits)) - return false; - - /* Check the sequence of one bits is repeated 64/bits times. */ - return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26]; -} - -/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. - Assumed precondition: VAL_IN Is not zero. */ - -unsigned HOST_WIDE_INT -aarch64_and_split_imm1 (HOST_WIDE_INT val_in) -{ - int lowest_bit_set = ctz_hwi (val_in); - int highest_bit_set = floor_log2 (val_in); - gcc_assert (val_in != 0); - - return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - - (HOST_WIDE_INT_1U << lowest_bit_set)); -} - -/* Create constant where bits outside of lowest bit set to highest bit set - are set to 1. */ - -unsigned HOST_WIDE_INT -aarch64_and_split_imm2 (HOST_WIDE_INT val_in) -{ - return val_in | ~aarch64_and_split_imm1 (val_in); -} - -/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ - -bool -aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) -{ - scalar_int_mode int_mode; - if (!is_a <scalar_int_mode> (mode, &int_mode)) - return false; - - if (aarch64_bitmask_imm (val_in, int_mode)) - return false; - - if (aarch64_move_imm (val_in, int_mode)) - return false; - - unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); - - return aarch64_bitmask_imm (imm2, int_mode); -} - -/* Return true if val is an immediate that can be loaded into a - register in a single instruction. */ -bool -aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) -{ - scalar_int_mode int_mode; - if (!is_a <scalar_int_mode> (mode, &int_mode)) - return false; - - if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) - return 1; - return aarch64_bitmask_imm (val, int_mode); -} - static bool aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 0e8ba14..5d9e5a1 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2626,6 +2626,35 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) machine_mode op_mode = GET_MODE (op0); bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode); + if (op_mode == BFmode) + { + rtx op = gen_lowpart (HImode, op0); + if (CONST_INT_P (op)) + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, + op0, BFmode); + else + { + rtx t1 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (t1, op)); + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); + op = gen_lowpart (SFmode, t1); + } + *pop0 = op; + op = gen_lowpart (HImode, op1); + if (CONST_INT_P (op)) + op = simplify_const_unary_operation (FLOAT_EXTEND, SFmode, + op1, BFmode); + else + { + rtx t1 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (t1, op)); + emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16))); + op = gen_lowpart (SFmode, t1); + } + *pop1 = op; + return ix86_prepare_fp_compare_args (code, pop0, pop1); + } + /* All of the unordered compare instructions only work on registers. The same is true of the fcomi compare instructions. The XFmode compare instructions require registers except when comparing @@ -3164,6 +3193,10 @@ ix86_expand_int_movcc (rtx operands[]) && !TARGET_64BIT)) return false; + if (GET_MODE (op0) == BFmode + && !ix86_fp_comparison_operator (operands[1], VOIDmode)) + return false; + start_sequence (); compare_op = ix86_expand_compare (code, op0, op1); compare_seq = get_insns (); @@ -4238,6 +4271,10 @@ ix86_expand_fp_movcc (rtx operands[]) rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); + if (GET_MODE (op0) == BFmode + && !ix86_fp_comparison_operator (operands[1], VOIDmode)) + return false; + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) { machine_mode cmode; diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md index 376a145..9c25b4e 100644 --- a/gcc/config/i386/znver.md +++ b/gcc/config/i386/znver.md @@ -23,8 +23,8 @@ ;; AMD znver1, znver2 and znver3 Scheduling ;; Modeling automatons for zen decoders, integer execution pipes, -;; AGU pipes, floating point execution, branch and store units. -(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver4_bru, znver4_fp_store") +;; AGU pipes and floating point execution units. +(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu") ;; Decoders unit has 4 decoders and all of them can decode fast path ;; and vector type instructions. @@ -63,8 +63,6 @@ ;; Load is 4 cycles. We do not model reservation of load unit. ;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing") (define_reservation "znver1-load" "znver1-agu-reserve") -;; According to Manual, all AGU are used for loads and stores in znver4. -(define_reservation "znver4-load" "znver2-store-agu-reserve") ;; Store operations differs between znver1, znver2 and znver3 because extra AGU ;; was added. (define_reservation "znver1-store" "znver1-agu-reserve") @@ -95,11 +93,6 @@ +znver1-fp2+znver1-fp3 +znver1-agu0+znver1-agu1+znver2-agu2") -;; znver4 has one branch unit in znver1-ieu0 and a separate branch unit. -(define_cpu_unit "znver4-bru0" "znver4_bru") -;; znver4 also has dedicated fp-store unit. -(define_cpu_unit "znver4-fp-store0" "znver4_fp_store") - ;; Call instruction (define_insn_reservation "znver1_call" 1 (and (eq_attr "cpu" "znver1") @@ -111,11 +104,6 @@ (eq_attr "type" "call,callv")) "znver1-double,znver2-store,znver1-ieu0|znver1-ieu3") -(define_insn_reservation "znver4_call" 1 - (and (eq_attr "cpu" "znver4") - (eq_attr "type" "call,callv")) - "znver1-double,znver1-ieu0|znver4-bru0,znver2-store") - ;; General instructions (define_insn_reservation "znver1_push" 1 (and (eq_attr "cpu" "znver1") @@ -123,7 +111,7 @@ (eq_attr "memory" "store"))) "znver1-direct,znver1-store") (define_insn_reservation "znver2_push" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "push") (eq_attr "memory" "store"))) "znver1-direct,znver2-store") @@ -138,22 +126,12 @@ (and (eq_attr "type" "push") (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver2-store") -(define_insn_reservation "znver4_push_load" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "push") - (eq_attr "memory" "both"))) - "znver1-direct,znver4-load,znver2-store") (define_insn_reservation "znver1_pop" 4 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "pop") (eq_attr "memory" "load"))) "znver1-direct,znver1-load") -(define_insn_reservation "znver4_pop" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "pop") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load") (define_insn_reservation "znver1_pop_mem" 4 (and (eq_attr "cpu" "znver1") @@ -165,11 +143,6 @@ (and (eq_attr "type" "pop") (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver2-store") -(define_insn_reservation "znver4_pop_mem" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "pop") - (eq_attr "memory" "both"))) - "znver1-direct,znver4-load,znver2-store") ;; Leave (define_insn_reservation "znver1_leave" 1 @@ -177,7 +150,7 @@ (eq_attr "type" "leave")) "znver1-double,znver1-ieu, znver1-store") (define_insn_reservation "znver2_leave" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (eq_attr "type" "leave")) "znver1-double,znver1-ieu, znver2-store") @@ -189,29 +162,12 @@ (and (eq_attr "type" "imul") (eq_attr "memory" "none"))) "znver1-direct,znver1-ieu1") -(define_insn_reservation "znver4_imul" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "imul") - (and (eq_attr "mode" "SI,HI,QI") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-ieu1") -(define_insn_reservation "znver4_imul_DI" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "imul") - (and (eq_attr "mode" "DI") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-ieu1") (define_insn_reservation "znver1_imul_mem" 7 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imul") (eq_attr "memory" "!none"))) "znver1-direct,znver1-load, znver1-ieu1") -(define_insn_reservation "znver4_imul_mem" 7 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "imul") - (eq_attr "memory" "!none"))) - "znver1-direct,znver4-load, znver1-ieu1") ;; Divisions ;; Reg operands @@ -305,14 +261,14 @@ (and (eq_attr "type" "idiv") (and (eq_attr "mode" "DI") (eq_attr "memory" "load")))) - "znver1-double,znver1-load,znver1-ieu2*18") + "znver1-double,znver1-load,znver1-ieu2*22") (define_insn_reservation "znver3_idiv_mem_SI" 16 (and (eq_attr "cpu" "znver3") (and (eq_attr "type" "idiv") (and (eq_attr "mode" "SI") (eq_attr "memory" "load")))) - "znver1-double,znver1-load,znver1-ieu2*12") + "znver1-double,znver1-load,znver1-ieu2*16") (define_insn_reservation "znver3_idiv_mem_HI" 14 (and (eq_attr "cpu" "znver3") @@ -328,62 +284,6 @@ (eq_attr "memory" "load")))) "znver1-direct,znver1-load,znver1-ieu2*9") -(define_insn_reservation "znver4_idiv_DI" 18 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "DI") - (eq_attr "memory" "none")))) - "znver1-double,znver1-ieu0*18") - -(define_insn_reservation "znver4_idiv_SI" 12 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "SI") - (eq_attr "memory" "none")))) - "znver1-double,znver1-ieu0*12") - -(define_insn_reservation "znver4_idiv_HI" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "HI") - (eq_attr "memory" "none")))) - "znver1-double,znver1-ieu0*10") - -(define_insn_reservation "znver4_idiv_QI" 9 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "QI") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-ieu0*9") - -(define_insn_reservation "znver4_idiv_mem_DI" 22 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "DI") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-ieu0*18") - -(define_insn_reservation "znver4_idiv_mem_SI" 16 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "SI") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-ieu0*12") - -(define_insn_reservation "znver4_idiv_mem_HI" 14 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "HI") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-ieu0*10") - -(define_insn_reservation "znver4_idiv_mem_QI" 13 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "idiv") - (and (eq_attr "mode" "QI") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-ieu0*9") - ;; STR ISHIFT which are micro coded. ;; Fix me: Latency need to be rechecked. (define_insn_reservation "znver1_str_ishift" 6 @@ -393,15 +293,15 @@ "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_str_ishift" 3 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ishift") (eq_attr "memory" "both,store"))) - "znver1-vector,znver2-ivector") + "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_str_istr" 19 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "str") (eq_attr "memory" "both,store"))) - "znver1-vector,znver2-ivector") + "znver1-vector,znver1-ivector") ;; MOV - integer moves (define_insn_reservation "znver1_load_imov_double" 2 @@ -418,15 +318,8 @@ (eq_attr "memory" "none")))) "znver1-double,znver1-ieu|znver1-ieu") -(define_insn_reservation "znver4_load_imov_double" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "znver1_decode" "double") - (and (eq_attr "type" "imovx") - (eq_attr "memory" "none")))) - "znver1-double,znver1-ieu0|znver1-ieu3") - (define_insn_reservation "znver1_load_imov_direct" 1 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imov,imovx") (eq_attr "memory" "none"))) "znver1-direct,znver1-ieu") @@ -439,7 +332,7 @@ "znver1-double,znver1-ieu|znver1-ieu,znver1-store") (define_insn_reservation "znver2_load_imov_double_store" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "imovx") (eq_attr "memory" "store")))) @@ -452,7 +345,7 @@ "znver1-direct,znver1-ieu,znver1-store") (define_insn_reservation "znver2_load_imov_direct_store" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "imov,imovx") (eq_attr "memory" "store"))) "znver1-direct,znver1-ieu,znver2-store") @@ -471,13 +364,6 @@ (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-ieu|znver1-ieu") -(define_insn_reservation "znver4_load_imov_double_load" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "znver1_decode" "double") - (and (eq_attr "type" "imovx") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-ieu") - (define_insn_reservation "znver1_load_imov_direct_load" 4 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imov,imovx") @@ -492,48 +378,12 @@ (eq_attr "memory" "none,unknown"))) "znver1-direct,znver1-ieu") -(define_insn_reservation "znver4_insn_1" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "alu,icmp,negnot,test,incdec") - (eq_attr "memory" "none,unknown"))) - "znver1-direct,znver1-ieu") - -(define_insn_reservation "znver4_insn_2" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "rotate,rotate1,ishift,ishift1") - (eq_attr "memory" "none,unknown"))) - "znver1-direct,znver1-ieu1|znver1-ieu2") - -(define_insn_reservation "znver4_insn_3" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "setcc,icmov") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-ieu0|znver1-ieu3") - (define_insn_reservation "znver1_insn_load" 5 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-ieu") -(define_insn_reservation "znver4_insn_1_load" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "alu,icmp,negnot,test,incdec") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-ieu") - -(define_insn_reservation "znver4_insn_2_load" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "rotate,rotate1,ishift,ishift1") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2") - -(define_insn_reservation "znver4_insn_3_load" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "setcc,icmov") - (eq_attr "memory" "load"))) - "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3") - (define_insn_reservation "znver1_insn_store" 1 (and (eq_attr "cpu" "znver1") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") @@ -546,24 +396,6 @@ (eq_attr "memory" "store"))) "znver1-direct,znver1-ieu,znver2-store") -(define_insn_reservation "znver4_insn_1_store" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "alu,icmp,negnot,test,incdec") - (eq_attr "memory" "store"))) - "znver1-direct,znver1-ieu,znver2-store") - -(define_insn_reservation "znver4_insn_2_store" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "rotate,rotate1,ishift,ishift1") - (eq_attr "memory" "store"))) - "znver1-direct,znver1-ieu1|znver1-ieu2,znver2-store") - -(define_insn_reservation "znver4_insn_3_store" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "setcc,icmov") - (eq_attr "memory" "store"))) - "znver1-double,znver1-ieu0|znver1-ieu3,znver2-store") - (define_insn_reservation "znver1_insn_both" 5 (and (eq_attr "cpu" "znver1") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") @@ -576,24 +408,6 @@ (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver1-ieu,znver2-store") -(define_insn_reservation "znver4_insn_1_both" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "alu,icmp,negnot,test,incdec") - (eq_attr "memory" "both"))) - "znver1-direct,znver4-load,znver1-ieu,znver2-store") - -(define_insn_reservation "znver4_insn_2_both" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "rotate,rotate1,ishift,ishift1") - (eq_attr "memory" "both"))) - "znver1-direct,znver4-load,znver1-ieu1|znver1-ieu2,znver2-store") - -(define_insn_reservation "znver4_insn_3_both" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "setcc,icmov") - (eq_attr "memory" "both"))) - "znver1-double,znver4-load,znver1-ieu0|znver1-ieu3,znver2-store") - ;; Fix me: Other vector type insns keeping latency 6 as of now. (define_insn_reservation "znver1_ieu_vector" 6 (and (eq_attr "cpu" "znver1") @@ -601,7 +415,7 @@ "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_ieu_vector" 5 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (eq_attr "type" "other,str,multi")) "znver1-vector,znver2-ivector") @@ -614,21 +428,21 @@ "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_alu1_vector" 3 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "vector") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) "znver1-vector,znver2-ivector") (define_insn_reservation "znver1_alu1_double" 2 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) "znver1-double,znver1-ieu") (define_insn_reservation "znver1_alu1_direct" 1 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "direct") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) @@ -640,11 +454,6 @@ (and (eq_attr "type" "ibr") (eq_attr "memory" "none"))) "znver1-direct") -(define_insn_reservation "znver4_branch" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ibr") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-ieu0|znver4-bru0") ;; Indirect branches check latencies. (define_insn_reservation "znver1_indirect_branch_mem" 6 @@ -659,36 +468,25 @@ (eq_attr "memory" "load"))) "znver1-vector,znver2-ivector") -(define_insn_reservation "znver4_indirect_branch_mem" 6 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ibr") - (eq_attr "memory" "load"))) - "znver1-vector,znver2-ivector+znver4-bru0") - ;; LEA executes in ALU units with 1 cycle latency. (define_insn_reservation "znver1_lea" 1 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "lea")) "znver1-direct,znver1-ieu") -;; Other integer instructions +;; Other integer instrucions (define_insn_reservation "znver1_idirect" 1 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown"))) "znver1-direct,znver1-ieu") ;; Floating point (define_insn_reservation "znver1_fp_cmov" 6 - (and (eq_attr "cpu" "znver1") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "fcmov")) "znver1-vector,znver1-fvector") -(define_insn_reservation "znver2_fp_cmov" 6 - (and (eq_attr "cpu" "znver2,znver3,znver4") - (eq_attr "type" "fcmov")) - "znver1-vector,znver2-fvector") - (define_insn_reservation "znver1_fp_mov_direct_load" 8 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "direct") @@ -696,13 +494,6 @@ (eq_attr "memory" "load")))) "znver1-direct,znver1-load,znver1-fp3|znver1-fp1") -(define_insn_reservation "znver4_fp_mov_direct_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "znver1_decode" "direct") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp2|znver1-fp3") - (define_insn_reservation "znver1_fp_mov_direct_store" 5 (and (eq_attr "cpu" "znver1") (and (eq_attr "znver1_decode" "direct") @@ -710,7 +501,7 @@ (eq_attr "memory" "store")))) "znver1-direct,znver1-fp2|znver1-fp3,znver1-store") (define_insn_reservation "znver2_fp_mov_direct_store" 5 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "direct") (and (eq_attr "type" "fmov") (eq_attr "memory" "store")))) @@ -723,13 +514,6 @@ (eq_attr "memory" "none")))) "znver1-double,znver1-fp3") -(define_insn_reservation "znver4_fp_mov_double" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "znver1_decode" "double") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "none")))) - "znver1-double,znver1-fp1") - (define_insn_reservation "znver1_fp_mov_double_load" 12 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "double") @@ -737,23 +521,11 @@ (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3") -(define_insn_reservation "znver4_fp_mov_double_load" 11 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "znver1_decode" "double") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-fp1") - (define_insn_reservation "znver1_fp_mov_direct" 1 (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "fmov")) "znver1-direct,znver1-fp3") -(define_insn_reservation "znver4_fp_mov_direct" 1 - (and (eq_attr "cpu" "znver4") - (eq_attr "type" "fmov")) - "znver1-direct,znver1-fp1") - ;; TODO: AGU? (define_insn_reservation "znver1_fp_spc_direct" 5 (and (eq_attr "cpu" "znver1,znver2,znver3") @@ -761,25 +533,13 @@ (eq_attr "memory" "store"))) "znver1-direct,znver1-fp3,znver1-fp2") -(define_insn_reservation "znver4_fp_spc_direct" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fpspc") - (eq_attr "memory" "store"))) - "znver1-direct,znver1-fp1,znver4-fp-store0") - -(define_insn_reservation "znver4_fp_sqrt_direct" 22 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fpspc") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp1") - (define_insn_reservation "znver1_fp_insn_vector" 6 (and (eq_attr "cpu" "znver1") (and (eq_attr "znver1_decode" "vector") (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov"))) "znver1-vector,znver1-fvector") (define_insn_reservation "znver2_fp_insn_vector" 6 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "vector") (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov"))) "znver1-vector,znver2-fvector") @@ -790,11 +550,6 @@ (eq_attr "type" "fsgn")) "znver1-direct,znver1-fp3") -(define_insn_reservation "znver4_fp_fsgn" 1 - (and (eq_attr "cpu" "znver4") - (eq_attr "type" "fsgn")) - "znver1-direct,znver1-fp0|znver1-fp1") - (define_insn_reservation "znver1_fp_fcmp" 2 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "memory" "none") @@ -802,39 +557,13 @@ (eq_attr "type" "fcmp")))) "znver1-double,znver1-fp0,znver1-fp2") -(define_insn_reservation "znver4_fp_fcmp_double" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "memory" "none") - (and (eq_attr "znver1_decode" "double") - (eq_attr "type" "fcmp")))) - "znver1-double,znver1-fp0,znver4-fp-store0") - -(define_insn_reservation "znver4_fp_fcmp" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fcmp") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0") - (define_insn_reservation "znver1_fp_fcmp_load" 9 (and (eq_attr "cpu" "znver1,znver2,znver3") - (and (eq_attr "memory" "load") + (and (eq_attr "memory" "none") (and (eq_attr "znver1_decode" "double") (eq_attr "type" "fcmp")))) "znver1-double,znver1-load, znver1-fp0,znver1-fp2") -(define_insn_reservation "znver4_fp_fcmp_double_load" 11 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "memory" "load") - (and (eq_attr "znver1_decode" "double") - (eq_attr "type" "fcmp")))) - "znver1-double,znver4-load,znver1-fp0,znver4-fp-store0") - -(define_insn_reservation "znver4_fp_fcmp_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fcmp") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp0") - ;;FADD FSUB FMUL (define_insn_reservation "znver1_fp_op_mul" 5 (and (eq_attr "cpu" "znver1,znver2,znver3") @@ -842,31 +571,12 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*5") -(define_insn_reservation "znver4_fp_op_mul" 6 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fop,fmul") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0*6") - (define_insn_reservation "znver1_fp_op_mul_load" 12 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0*5") -(define_insn_reservation "znver4_fp_op_mul_load" 13 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fop,fmul") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp0*6") - -(define_insn_reservation "znver4_fp_op_imul" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fop,fmul") - (and (eq_attr "fp_int_src" "true") - (eq_attr "memory" "none")))) - "znver1-double,znver1-fp1,znver1-fp0") - (define_insn_reservation "znver1_fp_op_imul_load" 16 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") @@ -874,15 +584,8 @@ (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3,znver1-fp0") -(define_insn_reservation "znver4_fp_op_imul_load" 17 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fop,fmul") - (and (eq_attr "fp_int_src" "true") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-fp1,znver1-fp0") - (define_insn_reservation "znver1_fp_op_div" 15 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fdiv") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*15") @@ -893,12 +596,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*15") -(define_insn_reservation "znver4_fp_op_div_load" 22 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fdiv") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp3*15") - (define_insn_reservation "znver1_fp_op_idiv_load" 27 (and (eq_attr "cpu" "znver1") (and (eq_attr "type" "fdiv") @@ -913,19 +610,6 @@ (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3*19") -(define_insn_reservation "znver4_fp_op_idiv" 19 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fdiv") - (and (eq_attr "fp_int_src" "true") - (eq_attr "memory" "none")))) - "znver1-double,znver1-fp1,znver1-fp1") - -(define_insn_reservation "znver4_fp_op_idiv_load" 26 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "fdiv") - (and (eq_attr "fp_int_src" "true") - (eq_attr "memory" "none")))) - "znver1-double,znver4-load,znver1-fp1,znver1-fp1") ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions (define_insn_reservation "znver1_fp_insn" 1 @@ -939,49 +623,26 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3") -(define_insn_reservation "znver4_fp_insn" 1 - (and (eq_attr "cpu" "znver4") - (eq_attr "type" "mmx,mmxadd")) - "znver1-direct,znver1-fpu") - (define_insn_reservation "znver1_mmx_add_load" 8 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxadd") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3") -(define_insn_reservation "znver4_mmx_add_load" 8 - (and (eq_attr "cpu" "znver1,znver2,znver3") - (and (eq_attr "type" "mmxadd") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fpu") - (define_insn_reservation "znver1_mmx_cmp" 1 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcmp") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp3") -(define_insn_reservation "znver4_mmx_cmp" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxcmp") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fpu") - (define_insn_reservation "znver1_mmx_cmp_load" 8 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcmp") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp3") -(define_insn_reservation "znver4_mmx_cmp_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxcmp") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fpu") - (define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1 - (and (eq_attr "cpu" "znver1,znver2,znver3,znver4") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp1|znver1-fp2") @@ -992,48 +653,18 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp1|znver1-fp2") -(define_insn_reservation "znver4_mmx_cvt_pck_shuf_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp1|znver1-fp2") - (define_insn_reservation "znver1_mmx_shift_move" 1 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxshft,mmxmov") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp2") -(define_insn_reservation "znver4_mmx_shift" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxshft") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp1|znver1-fp2") - -(define_insn_reservation "znver4_mmx_move" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxmov") - (eq_attr "memory" "none"))) - "znver1-direct,znver4-fp-store0") - (define_insn_reservation "znver1_mmx_shift_move_load" 8 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxshft,mmxmov") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp2") -(define_insn_reservation "znver4_mmx_shift_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxshft") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp1|znver1-fp2") - -(define_insn_reservation "znver4_mmx_move_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxmov") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver4-fp-store0") - (define_insn_reservation "znver1_mmx_move_store" 1 (and (eq_attr "cpu" "znver1") (and (eq_attr "type" "mmxshft,mmxmov") @@ -1045,42 +676,18 @@ (eq_attr "memory" "store,both"))) "znver1-direct,znver1-fp2,znver2-store") -(define_insn_reservation "znver4_mmx_shift_store" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxshft") - (eq_attr "memory" "store,both"))) - "znver1-direct,znver1-fp1|znver1-fp2,znver2-store") - -(define_insn_reservation "znver4_mmx_move_store" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxmov") - (eq_attr "memory" "store,both"))) - "znver1-direct,znver4-fp-store0") - (define_insn_reservation "znver1_mmx_mul" 3 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*3") -(define_insn_reservation "znver4_mmx_mul" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxmul") - (eq_attr "memory" "none"))) - "znver1-direct,(znver1-fp0|znver1-fp3)*3") - (define_insn_reservation "znver1_mmx_load" 10 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0*3") -(define_insn_reservation "znver4_mmx_mul_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "mmxmul") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,(znver1-fp0|znver1-fp3)*3") - ;; TODO (define_insn_reservation "znver1_avx256_log" 1 (and (eq_attr "cpu" "znver1") @@ -1102,62 +709,6 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fpu") -(define_insn_reservation "znver4_sse_log" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fpu") - -(define_insn_reservation "znver4_sse_log_evex" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "V16SF,V8DF") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3") - -(define_insn_reservation "znver4_sse_log_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fpu") - -(define_insn_reservation "znver4_sse_log_evex_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "V16SF,V8DF") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3") - -(define_insn_reservation "znver4_sse_ilog" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "OI") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3") - -(define_insn_reservation "znver4_sse_ilog_evex" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "TI") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3") - -(define_insn_reservation "znver4_sse_ilog_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "OI") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp0+znver1-fp1|znver1-fp2+znver1-fp3") - -(define_insn_reservation "znver4_sse_ilog_evex_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sselog,sselog1") - (and (eq_attr "mode" "TI") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp0+znver1-fp1+znver1-fp2+znver1-fp3") - (define_insn_reservation "znver1_sse_log_load" 8 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "sselog") @@ -1220,18 +771,6 @@ (eq_attr "memory" "none"))))) "znver1-double,znver1-fp0|znver1-fp1") -(define_insn_reservation "znver4_sse_comi" 1 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecomi") - (eq_attr "memory" "none"))) - "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0") - -(define_insn_reservation "znver4_sse_comi_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecomi") - (eq_attr "memory" "load"))) - "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0") - (define_insn_reservation "znver1_sse_comi_double_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,V2DF,TI")) @@ -1247,7 +786,7 @@ (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) (ior (eq_attr "cpu" "znver2") - (eq_attr "cpu" "znver3,znver4"))) + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix_extra" "1") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "none")))) @@ -1263,13 +802,6 @@ (eq_attr "memory" "load")))) "znver1-direct,znver1-load,znver1-fp1|znver1-fp2") -(define_insn_reservation "znver4_sse_test_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "prefix_extra" "1") - (and (eq_attr "type" "ssecomi") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp1|znver1-fp2") - ;; SSE moves ;; Fix me: Need to revist this again some of the moves may be restricted ;; to some fpu pipes. @@ -1282,7 +814,7 @@ "znver1-direct,znver1-ieu0") (define_insn_reservation "znver2_sse_mov" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "SI") (and (eq_attr "isa" "avx") (and (eq_attr "type" "ssemov") @@ -1299,7 +831,7 @@ "znver1-direct,znver1-ieu2") (define_insn_reservation "znver2_avx_mov" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "TI") (and (eq_attr "isa" "avx") (and (eq_attr "type" "ssemov") @@ -1311,8 +843,7 @@ (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) (ior (eq_attr "cpu" "znver2") - (ior (eq_attr "cpu" "znver3") - (eq_attr "cpu" "znver4")))) + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "ssemov") (eq_attr "memory" "none"))) "znver1-direct,znver1-fpu") @@ -1324,7 +855,7 @@ (eq_attr "memory" "store")))) "znver1-direct,znver1-fpu,znver1-store") (define_insn_reservation "znver2_sseavx_mov_store" 1 - (and (eq_attr "cpu" "znver2,znver3,znver4") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemov") (eq_attr "memory" "store"))) "znver1-direct,znver1-fpu,znver2-store") @@ -1338,12 +869,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fpu") -(define_insn_reservation "znver4_sseavx_mov_load" 8 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssemov") - (eq_attr "memory" "load"))) - "znver1-double,znver4-load,znver1-fpu") - (define_insn_reservation "znver1_avx256_mov" 1 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF,V4DF,OI") @@ -1370,8 +895,7 @@ (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) (ior (eq_attr "cpu" "znver2") - (ior (eq_attr "cpu" "znver3") - (eq_attr "cpu" "znver4")))) + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "sseadd") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp2|znver1-fp3") @@ -1385,12 +909,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp2|znver1-fp3") -(define_insn_reservation "znver4_sseavx_add_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "sseadd") - (eq_attr "memory" "load"))) - "znver1-double,znver4-load,znver1-fp2|znver1-fp3") - (define_insn_reservation "znver1_avx256_add" 3 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF,V4DF,OI") @@ -1442,20 +960,6 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1") -(define_insn_reservation "znver4_sseavx_fma" 4 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")) - (and (eq_attr "type" "ssemuladd") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sseavx_fma_evex" 4 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "V16SF,V8DF")) - (and (eq_attr "type" "ssemuladd") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0+znver1-fp1") - (define_insn_reservation "znver3_sseavx_fma_load" 11 (and (and (eq_attr "cpu" "znver3") (eq_attr "mode" "SF,DF,V4SF,V2DF")) @@ -1463,20 +967,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") -(define_insn_reservation "znver4_sseavx_fma_load" 11 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")) - (and (eq_attr "type" "ssemuladd") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sseavx_fma_evex_load" 11 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "V16SF,V8DF")) - (and (eq_attr "type" "ssemuladd") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp0+znver1-fp1") - (define_insn_reservation "znver3_avx256_fma" 4 (and (eq_attr "cpu" "znver3") (and (eq_attr "mode" "V8SF,V4DF") @@ -1500,20 +990,6 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3") -(define_insn_reservation "znver4_sseavx_iadd" 1 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI")) - (and (eq_attr "type" "sseiadd") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fpu") - -(define_insn_reservation "znver4_sseavx_iadd_load" 8 - (and (and (eq_attr "cpu" "znver4") - (eq_attr "mode" "QI,HI,SI,DI,TI,OI,XI")) - (and (eq_attr "type" "sseiadd") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fpu") - (define_insn_reservation "znver1_sseavx_iadd_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "DI,TI")) @@ -1577,33 +1053,6 @@ (eq_attr "memory" "load"))))) "znver1-double,znver1-load,znver1-fp3,znver1-ieu0") -(define_insn_reservation "znver4_ssecvtsfdf_si" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "SI") - (and (eq_attr "type" "sseicvt") - (eq_attr "memory" "none")))) - "znver1-double,znver1-fp2|znver1-fp3,znver4-fp-store0") - -(define_insn_reservation "znver4_ssecvtsfdf_si_load" 11 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "SI") - (and (eq_attr "type" "sseicvt") - (eq_attr "memory" "load")))) - "znver1-double,znver4-load,znver1-fp2|znver1-fp3,znver4-fp-store0") - -(define_insn_reservation "znver4_ssecvtsfdf_di" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "DI") - (and (eq_attr "type" "sseicvt") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp2|znver1-fp3") - -(define_insn_reservation "znver4_ssecvtsfdf_di_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "DI") - (and (eq_attr "type" "sseicvt") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp2|znver1-fp3") ;; All other used ssecvt fp3 pipes ;; Check: Need to revisit this again. @@ -1620,24 +1069,12 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3") -(define_insn_reservation "znver4_ssecvt" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp2|znver1-fp3") - (define_insn_reservation "znver1_ssecvt_load" 11 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "ssecvt") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3") -(define_insn_reservation "znver4_ssecvt_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (eq_attr "memory" "load"))) - "znver1-direct,znver4-load,znver1-fp2|znver1-fp3") - ;; SSE div (define_insn_reservation "znver1_ssediv_ss_ps" 10 (and (ior (and (eq_attr "cpu" "znver1") @@ -1650,21 +1087,6 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*10") -(define_insn_reservation "znver4_ssediv_ss_ps" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp3*10") - -(define_insn_reservation "znver4_ssediv_ss_ps_evex" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "none"))))) - "znver1-direct,znver1-fp1*10") - (define_insn_reservation "znver1_ssediv_ss_ps_load" 17 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) @@ -1676,21 +1098,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*10") -(define_insn_reservation "znver4_ssediv_ss_ps_load" 17 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp3*10") - -(define_insn_reservation "znver4_ssediv_ss_ps_evex_load" 17 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V16SF,V8SF,V4SF,SF") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "load"))))) - "znver1-direct,znver4-load,znver1-fp1*10") - (define_insn_reservation "znver1_ssediv_sd_pd" 13 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V2DF,DF")) @@ -1702,21 +1109,6 @@ (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*13") -(define_insn_reservation "znver4_ssediv_sd_pd" 13 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF") - (eq_attr "memory" "none")))) - "znver1-direct,znver1-fp3*13") - -(define_insn_reservation "znver4_ssediv_sd_pd_evex" 13 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "none"))))) - "znver1-direct,znver1-fp1*13") - (define_insn_reservation "znver1_ssediv_sd_pd_load" 20 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V2DF,DF")) @@ -1728,21 +1120,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*13") -(define_insn_reservation "znver4_ssediv_sd_pd_load" 20 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF") - (eq_attr "memory" "load")))) - "znver1-direct,znver4-load,znver1-fp3*13") - -(define_insn_reservation "znver4_ssediv_sd_pd_evex_load" 20 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") - (and (eq_attr "mode" "V8DF,V4DF,V2DF,DF") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "load"))))) - "znver1-direct,znver4-load,znver1-fp1*13") - (define_insn_reservation "znver1_ssediv_avx256_ps" 12 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF") @@ -1776,19 +1153,12 @@ (eq_attr "mode" "V4SF,SF")) (and (eq_attr "cpu" "znver2") (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")) - (and (eq_attr "cpu" "znver3,znver4") + (and (eq_attr "cpu" "znver3") (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))) (and (eq_attr "type" "ssemul") (eq_attr "memory" "none"))) "znver1-direct,(znver1-fp0|znver1-fp1)*3") -(define_insn_reservation "znver4_ssemul_ss_ps_evex" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssemul") - (and (eq_attr "mode" "V8DF,V16SF") - (eq_attr "memory" "none")))) - "znver1-direct,(znver1-fp0+znver1-fp1)*3") - (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) @@ -1800,13 +1170,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") -(define_insn_reservation "znver4_ssemul_ss_ps_evex_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssemul") - (and (eq_attr "mode" "V8DF,V16SF") - (eq_attr "memory" "none")))) - "znver1-direct,znver4-load,(znver1-fp0+znver1-fp1)*3") - (define_insn_reservation "znver1_ssemul_avx256_ps" 3 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF") @@ -1868,44 +1231,12 @@ (eq_attr "mode" "TI")) (and (eq_attr "cpu" "znver2") (eq_attr "mode" "TI,OI")) - (and (eq_attr "cpu" "znver3,znver4") + (and (eq_attr "cpu" "znver3") (eq_attr "mode" "TI,OI"))) (and (eq_attr "type" "sseimul") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*3") -(define_insn_reservation "znver4_sseimul" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "TI,OI") - (and (eq_attr "type" "sseimul") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "none"))))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sseimul_evex" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "XI") - (and (eq_attr "type" "sseimul") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "none"))))) - "znver1-direct,znver1-fp0+znver1-fp1") - -(define_insn_reservation "znver4_sseimul_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "TI,OI") - (and (eq_attr "type" "sseimul") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "load"))))) - "znver1-direct,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sseimul_evex_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "XI") - (and (eq_attr "type" "sseimul") - (and (eq_attr "prefix" "evex") - (eq_attr "memory" "load"))))) - "znver1-direct,znver4-load,znver1-fp0+znver1-fp1") - (define_insn_reservation "znver1_sseimul_avx256" 4 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "OI") @@ -1951,66 +1282,12 @@ (eq_attr "mode" "SF,DF,V4SF,V2DF")) (and (eq_attr "cpu" "znver2") (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")) - (and (eq_attr "cpu" "znver3,znver4") + (and (eq_attr "cpu" "znver3") (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1") -(define_insn_reservation "znver4_sse_cmp" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "SF,DF,V4SF,V2DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "SF,DF,V4SF,V2DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_vex" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "V8SF,V4DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_vex_load" 11 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "V8SF,V4DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_evex" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "V16SF,V8DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0+znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_evex_load" 12 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "V16SF,V8DF") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0+znver1-fp1") - (define_insn_reservation "znver1_sse_cmp_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF")) @@ -2041,7 +1318,7 @@ (eq_attr "mode" "QI,HI,SI,DI,TI")) (and (eq_attr "cpu" "znver2") (eq_attr "mode" "QI,HI,SI,DI,TI,OI")) - (and (eq_attr "cpu" "znver3,znver4") + (and (eq_attr "cpu" "znver3") (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "none"))) @@ -2058,60 +1335,6 @@ (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp3") -(define_insn_reservation "znver4_sse_icmp" 3 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "QI,HI,SI,DI,TI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_icmp_load" 10 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "QI,HI,SI,DI,TI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_icmp_vex" 4 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "OI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_cmp_ivex_load" 11 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "OI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0|znver1-fp1") - -(define_insn_reservation "znver4_sse_icmp_evex" 5 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "XI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "none")))))) - "znver1-direct,znver1-fp0+znver1-fp1") - -(define_insn_reservation "znver4_sse_icmp_evex_load" 12 - (and (eq_attr "cpu" "znver4") - (and (eq_attr "mode" "XI") - (and (eq_attr "type" "ssecmp") - (and (eq_attr "prefix" "evex") - (and (eq_attr "length_immediate" "1") - (eq_attr "memory" "load")))))) - "znver1-double,znver4-load,znver1-fp0+znver1-fp1") - (define_insn_reservation "znver1_sse_icmp_avx256" 1 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "OI") diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc index 8fcca18..9290a1f 100644 --- a/gcc/config/microblaze/microblaze.cc +++ b/gcc/config/microblaze/microblaze.cc @@ -1103,7 +1103,7 @@ microblaze_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (xinsn) == SYMBOL_REF) { - rtx reg; + rtx reg = NULL; if (microblaze_tls_symbol_p(xinsn)) { reg = microblaze_legitimize_tls_address (xinsn, NULL_RTX); @@ -1133,6 +1133,11 @@ microblaze_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, reg = pic_ref; } } + else + { + /* This should never happen. */ + gcc_unreachable (); + } return reg; } @@ -1474,7 +1479,7 @@ microblaze_address_insns (rtx x, machine_mode mode) case TLS_DTPREL: return 1; default : - abort(); + gcc_unreachable (); } default: break; @@ -2624,7 +2629,7 @@ print_operand_address (FILE * file, rtx addr) fputs ("@TLSDTPREL", file); break; default : - abort(); + gcc_unreachable (); break; } } @@ -3413,7 +3418,7 @@ microblaze_expand_move (machine_mode mode, rtx operands[]) } if (GET_CODE (op1) == PLUS && GET_CODE (XEXP (op1,1)) == CONST) { - rtx p0, p1, result, temp; + rtx p0, p1 = NULL, result, temp; p0 = XEXP (XEXP (op1,1), 0); @@ -3423,6 +3428,10 @@ microblaze_expand_move (machine_mode mode, rtx operands[]) p0 = XEXP (p0, 0); } + /* This should never happen. */ + if (p1 == NULL) + gcc_unreachable (); + if (GET_CODE (p0) == UNSPEC && GET_CODE (p1) == CONST_INT && flag_pic && TARGET_PIC_DATA_TEXT_REL) { @@ -3799,7 +3808,7 @@ get_branch_target (rtx branch) if (GET_CODE (call) == SET) call = SET_SRC (call); if (GET_CODE (call) != CALL) - abort (); + gcc_unreachable (); return XEXP (XEXP (call, 0), 0); } diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 387376b..699ea6c 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -20277,13 +20277,7 @@ mips_option_override (void) target_flags |= MASK_ODD_SPREG; } - if (!ISA_HAS_COMPACT_BRANCHES && mips_cb == MIPS_CB_ALWAYS) - { - error ("unsupported combination: %qs%s %s", - mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "", - "-mcompact-branches=always"); - } - else if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER) + if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER) { error ("unsupported combination: %qs%s %s", mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "", diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 74b6e11..69de74e 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -120,11 +120,9 @@ struct mips_cpu_info { #define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic) /* Compact branches must not be used if the user either selects the - 'never' policy or the 'optimal' policy on a core that lacks + 'never' policy or the 'optimal' / 'always' policy on a core that lacks compact branch instructions. */ -#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER \ - || (mips_cb == MIPS_CB_OPTIMAL \ - && !ISA_HAS_COMPACT_BRANCHES)) +#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER || !ISA_HAS_COMPACT_BRANCHES) /* Compact branches may be used if the user either selects the 'always' policy or the 'optimal' policy on a core that supports @@ -134,10 +132,11 @@ struct mips_cpu_info { && ISA_HAS_COMPACT_BRANCHES)) /* Compact branches must always be generated if the user selects - the 'always' policy or the 'optimal' policy om a core that - lacks delay slot branch instructions. */ -#define TARGET_CB_ALWAYS (mips_cb == MIPS_CB_ALWAYS \ - || (mips_cb == MIPS_CB_OPTIMAL \ + the 'always' policy on a core support compact branches, + or the 'optimal' policy on a core that lacks delay slot branch instructions. */ +#define TARGET_CB_ALWAYS ((mips_cb == MIPS_CB_ALWAYS \ + && ISA_HAS_COMPACT_BRANCHES) \ + || (mips_cb == MIPS_CB_OPTIMAL \ && !ISA_HAS_DELAY_SLOTS)) /* Special handling for JRC that exists in microMIPSR3 as well as R6 @@ -677,6 +676,13 @@ struct mips_cpu_info { builtin_define ("__mips_no_lxc1_sxc1"); \ if (!ISA_HAS_UNFUSED_MADD4 && !ISA_HAS_FUSED_MADD4) \ builtin_define ("__mips_no_madd4"); \ + \ + if (TARGET_CB_NEVER) \ + builtin_define ("__mips_compact_branches_never"); \ + else if (TARGET_CB_ALWAYS) \ + builtin_define ("__mips_compact_branches_always"); \ + else \ + builtin_define ("__mips_compact_branches_optimal"); \ } \ while (0) @@ -909,7 +915,8 @@ struct mips_cpu_info { {"mips-plt", "%{!mplt:%{!mno-plt:-m%(VALUE)}}" }, \ {"synci", "%{!msynci:%{!mno-synci:-m%(VALUE)}}" }, \ {"lxc1-sxc1", "%{!mlxc1-sxc1:%{!mno-lxc1-sxc1:-m%(VALUE)}}" }, \ - {"madd4", "%{!mmadd4:%{!mno-madd4:-m%(VALUE)}}" } \ + {"madd4", "%{!mmadd4:%{!mno-madd4:-m%(VALUE)}}" }, \ + {"compact-branches", "%{!mcompact-branches=*:-mcompact-branches=%(VALUE)}" } \ /* A spec that infers the: -mnan=2008 setting from a -mips argument, @@ -3427,6 +3434,7 @@ struct GTY(()) machine_function { /* If we are *not* using multilibs and the default ABI is not ABI_32 we need to change these from /lib and /usr/lib. */ +#ifndef ENABLE_MULTIARCH #if MIPS_ABI_DEFAULT == ABI_N32 #define STANDARD_STARTFILE_PREFIX_1 "/lib32/" #define STANDARD_STARTFILE_PREFIX_2 "/usr/lib32/" @@ -3434,6 +3442,7 @@ struct GTY(()) machine_function { #define STANDARD_STARTFILE_PREFIX_1 "/lib64/" #define STANDARD_STARTFILE_PREFIX_2 "/usr/lib64/" #endif +#endif /* Load store bonding is not supported by micromips and fix_24k. The performance can be degraded for those targets. Hence, do not bond for diff --git a/gcc/config/mips/t-linux64 b/gcc/config/mips/t-linux64 index 2fdd8e0..37d176e 100644 --- a/gcc/config/mips/t-linux64 +++ b/gcc/config/mips/t-linux64 @@ -20,7 +20,26 @@ MULTILIB_OPTIONS = mabi=n32/mabi=32/mabi=64 MULTILIB_DIRNAMES = n32 32 64 MIPS_EL = $(if $(filter %el, $(firstword $(subst -, ,$(target)))),el) MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft) -MULTILIB_OSDIRNAMES = \ +ifeq (yes,$(enable_multiarch)) + ifneq (,$(findstring gnuabi64,$(target))) + MULTILIB_OSDIRNAMES = \ + ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ + ../libo32$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ + ../lib$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + else ifneq (,$(findstring gnuabin32,$(target))) + MULTILIB_OSDIRNAMES = \ + ../lib$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ + ../libo32$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ + ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + else + MULTILIB_OSDIRNAMES = \ + ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ + ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ + ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + endif +else + MULTILIB_OSDIRNAMES = \ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) +endif diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 8997284..951dcc5 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -128,3 +128,25 @@ "POLY_INT" (and (match_code "const_poly_int") (match_test "known_eq (rtx_to_poly_int64 (op), BYTES_PER_RISCV_VECTOR)"))) + +(define_constraint "vu" + "A undefined vector value." + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_VUNDEF"))) + +(define_constraint "vi" + "A vector 5-bit signed immediate." + (and (match_code "const_vector") + (match_test "riscv_vector::const_vec_all_same_in_range_p (op, -16, 15)"))) + +(define_constraint "Wc0" + "@internal + A constraint that matches a vector of immediate all zeros." + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + +(define_constraint "Wc1" + "@internal + A constraint that matches a vector of immediate all ones." + (and (match_code "const_vector") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 5e149b3..c2ff41b 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -262,3 +262,26 @@ return true; }) + +;; Predicates for the V extension. +(define_special_predicate "vector_length_operand" + (ior (match_operand 0 "pmode_register_operand") + (match_operand 0 "const_csr_operand"))) + +(define_predicate "reg_or_mem_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "memory_operand"))) + +(define_predicate "vector_move_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_code "const_vector"))) + +(define_predicate "vector_mask_operand" + (ior (match_operand 0 "register_operand") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) + +(define_predicate "vector_merge_operand" + (ior (match_operand 0 "memory_operand") + (ior (match_operand 0 "register_operand") + (match_test "GET_CODE (op) == UNSPEC + && (XINT (op, 1) == UNSPEC_VUNDEF)")))) diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index ecb5e21..b84ad99 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -17,19 +17,46 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* This is a list of tune that implement RISC-V. + + Before using #include to read this file, define a macro: + + RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) + + The TUNE_NAME is the name of the micro-arch, represented as a string. + The PIPELINE_MODEL is the pipeline model of the micro-arch, represented as a + string, defined in riscv.md. + The TUNE_INFO is the detail cost model for this core, represented as an + identifier, reference to riscv.cc. */ + +#ifndef RISCV_TUNE +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) +#endif + +RISCV_TUNE("rocket", generic, rocket_tune_info) +RISCV_TUNE("sifive-3-series", generic, rocket_tune_info) +RISCV_TUNE("sifive-5-series", generic, rocket_tune_info) +RISCV_TUNE("sifive-7-series", generic, sifive_7_tune_info) +RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) +RISCV_TUNE("size", generic, optimize_size_tune_info) + +#undef RISCV_TUNE + /* This is a list of cores that implement RISC-V. Before using #include to read this file, define a macro: - RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH, TUNE_INFO) + RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) The CORE_NAME is the name of the core, represented as a string. The ARCH is the default arch of the core, represented as a string, can be NULL if no default arch. The MICRO_ARCH is the name of the core for which scheduling decisions - will be made, represented as an identifier. - The TUNE_INFO is the detail cost model for this core, represented as an - identifier, reference to riscv-tunes.def. */ + will be made, represented as an identifier. */ + +#ifndef RISCV_CORE +#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) +#endif RISCV_CORE("sifive-e20", "rv32imc", "rocket") RISCV_CORE("sifive-e21", "rv32imac", "rocket") diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def index 95f69e8..556b5c5 100644 --- a/gcc/config/riscv/riscv-modes.def +++ b/gcc/config/riscv/riscv-modes.def @@ -37,21 +37,24 @@ FLOAT_MODE (TF, 16, ieee_quad_format); | VNx32BI | 1 | 2 | | VNx64BI | N/A | 1 | */ -VECTOR_BOOL_MODE (VNx1BI, 1, BI, 8); -VECTOR_BOOL_MODE (VNx2BI, 2, BI, 8); -VECTOR_BOOL_MODE (VNx4BI, 4, BI, 8); -VECTOR_BOOL_MODE (VNx8BI, 8, BI, 8); -VECTOR_BOOL_MODE (VNx16BI, 16, BI, 8); -VECTOR_BOOL_MODE (VNx32BI, 32, BI, 8); +/* For RVV modes, each boolean value occupies 1-bit. + 4th argument is specify the minmial possible size of the vector mode, + and will adjust to the right size by ADJUST_BYTESIZE. */ +VECTOR_BOOL_MODE (VNx1BI, 1, BI, 1); +VECTOR_BOOL_MODE (VNx2BI, 2, BI, 1); +VECTOR_BOOL_MODE (VNx4BI, 4, BI, 1); +VECTOR_BOOL_MODE (VNx8BI, 8, BI, 1); +VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2); +VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4); VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8); -ADJUST_NUNITS (VNx1BI, riscv_vector_chunks * 1); -ADJUST_NUNITS (VNx2BI, riscv_vector_chunks * 2); -ADJUST_NUNITS (VNx4BI, riscv_vector_chunks * 4); -ADJUST_NUNITS (VNx8BI, riscv_vector_chunks * 8); -ADJUST_NUNITS (VNx16BI, riscv_vector_chunks * 16); -ADJUST_NUNITS (VNx32BI, riscv_vector_chunks * 32); -ADJUST_NUNITS (VNx64BI, riscv_vector_chunks * 64); +ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1)); +ADJUST_NUNITS (VNx2BI, riscv_v_adjust_nunits (VNx2BImode, 2)); +ADJUST_NUNITS (VNx4BI, riscv_v_adjust_nunits (VNx4BImode, 4)); +ADJUST_NUNITS (VNx8BI, riscv_v_adjust_nunits (VNx8BImode, 8)); +ADJUST_NUNITS (VNx16BI, riscv_v_adjust_nunits (VNx16BImode, 16)); +ADJUST_NUNITS (VNx32BI, riscv_v_adjust_nunits (VNx32BImode, 32)); +ADJUST_NUNITS (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 64)); ADJUST_ALIGNMENT (VNx1BI, 1); ADJUST_ALIGNMENT (VNx2BI, 1); @@ -67,33 +70,33 @@ ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); -ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); +ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8)); /* | Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 | - | | LMUL | SEW/LMUL | LMUL | SEW/LMUL | - | VNx1QI | MF4 | 32 | MF8 | 64 | - | VNx2QI | MF2 | 16 | MF4 | 32 | - | VNx4QI | M1 | 8 | MF2 | 16 | - | VNx8QI | M2 | 4 | M1 | 8 | - | VNx16QI | M4 | 2 | M2 | 4 | - | VNx32QI | M8 | 1 | M4 | 2 | - | VNx64QI | N/A | N/A | M8 | 1 | - | VNx1(HI|HF) | MF2 | 32 | MF4 | 64 | - | VNx2(HI|HF) | M1 | 16 | MF2 | 32 | - | VNx4(HI|HF) | M2 | 8 | M1 | 16 | - | VNx8(HI|HF) | M4 | 4 | M2 | 8 | - | VNx16(HI|HF)| M8 | 2 | M4 | 4 | - | VNx32(HI|HF)| N/A | N/A | M8 | 2 | - | VNx1(SI|SF) | M1 | 32 | MF2 | 64 | - | VNx2(SI|SF) | M2 | 16 | M1 | 32 | - | VNx4(SI|SF) | M4 | 8 | M2 | 16 | - | VNx8(SI|SF) | M8 | 4 | M4 | 8 | - | VNx16(SI|SF)| N/A | N/A | M8 | 4 | - | VNx1(DI|DF) | N/A | N/A | M1 | 64 | - | VNx2(DI|DF) | N/A | N/A | M2 | 32 | - | VNx4(DI|DF) | N/A | N/A | M4 | 16 | - | VNx8(DI|DF) | N/A | N/A | M8 | 8 | + | | LMUL | SEW/LMUL | LMUL | SEW/LMUL | + | VNx1QI | MF4 | 32 | MF8 | 64 | + | VNx2QI | MF2 | 16 | MF4 | 32 | + | VNx4QI | M1 | 8 | MF2 | 16 | + | VNx8QI | M2 | 4 | M1 | 8 | + | VNx16QI | M4 | 2 | M2 | 4 | + | VNx32QI | M8 | 1 | M4 | 2 | + | VNx64QI | N/A | N/A | M8 | 1 | + | VNx1(HI|HF) | MF2 | 32 | MF4 | 64 | + | VNx2(HI|HF) | M1 | 16 | MF2 | 32 | + | VNx4(HI|HF) | M2 | 8 | M1 | 16 | + | VNx8(HI|HF) | M4 | 4 | M2 | 8 | + | VNx16(HI|HF)| M8 | 2 | M4 | 4 | + | VNx32(HI|HF)| N/A | N/A | M8 | 2 | + | VNx1(SI|SF) | M1 | 32 | MF2 | 64 | + | VNx2(SI|SF) | M2 | 16 | M1 | 32 | + | VNx4(SI|SF) | M4 | 8 | M2 | 16 | + | VNx8(SI|SF) | M8 | 4 | M4 | 8 | + | VNx16(SI|SF)| N/A | N/A | M8 | 4 | + | VNx1(DI|DF) | N/A | N/A | M1 | 64 | + | VNx2(DI|DF) | N/A | N/A | M2 | 32 | + | VNx4(DI|DF) | N/A | N/A | M4 | 16 | + | VNx8(DI|DF) | N/A | N/A | M8 | 8 | */ /* Define RVV modes whose sizes are multiples of 64-bit chunks. */ @@ -101,13 +104,13 @@ ADJUST_BYTESIZE (VNx64BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk); VECTOR_MODES_WITH_PREFIX (VNx, INT, 8 * NVECS, 0); \ VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8 * NVECS, 0); \ \ - ADJUST_NUNITS (VB##QI, riscv_vector_chunks * NVECS * 8); \ - ADJUST_NUNITS (VH##HI, riscv_vector_chunks * NVECS * 4); \ - ADJUST_NUNITS (VS##SI, riscv_vector_chunks * NVECS * 2); \ - ADJUST_NUNITS (VD##DI, riscv_vector_chunks * NVECS); \ - ADJUST_NUNITS (VH##HF, riscv_vector_chunks * NVECS * 4); \ - ADJUST_NUNITS (VS##SF, riscv_vector_chunks * NVECS * 2); \ - ADJUST_NUNITS (VD##DF, riscv_vector_chunks * NVECS); \ + ADJUST_NUNITS (VB##QI, riscv_v_adjust_nunits (VB##QI##mode, NVECS * 8)); \ + ADJUST_NUNITS (VH##HI, riscv_v_adjust_nunits (VH##HI##mode, NVECS * 4)); \ + ADJUST_NUNITS (VS##SI, riscv_v_adjust_nunits (VS##SI##mode, NVECS * 2)); \ + ADJUST_NUNITS (VD##DI, riscv_v_adjust_nunits (VD##DI##mode, NVECS)); \ + ADJUST_NUNITS (VH##HF, riscv_v_adjust_nunits (VH##HF##mode, NVECS * 4)); \ + ADJUST_NUNITS (VS##SF, riscv_v_adjust_nunits (VS##SF##mode, NVECS * 2)); \ + ADJUST_NUNITS (VD##DF, riscv_v_adjust_nunits (VD##DF##mode, NVECS)); \ \ ADJUST_ALIGNMENT (VB##QI, 1); \ ADJUST_ALIGNMENT (VH##HI, 2); \ @@ -128,9 +131,9 @@ RVV_MODES (8, VNx64, VNx32, VNx16, VNx8) VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 0); VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 0); -ADJUST_NUNITS (VNx4QI, riscv_vector_chunks * 4); -ADJUST_NUNITS (VNx2HI, riscv_vector_chunks * 2); -ADJUST_NUNITS (VNx2HF, riscv_vector_chunks * 2); +ADJUST_NUNITS (VNx4QI, riscv_v_adjust_nunits (VNx4QImode, 4)); +ADJUST_NUNITS (VNx2HI, riscv_v_adjust_nunits (VNx2HImode, 2)); +ADJUST_NUNITS (VNx2HF, riscv_v_adjust_nunits (VNx2HFmode, 2)); ADJUST_ALIGNMENT (VNx4QI, 1); ADJUST_ALIGNMENT (VNx2HI, 2); ADJUST_ALIGNMENT (VNx2HF, 2); @@ -139,28 +142,28 @@ ADJUST_ALIGNMENT (VNx2HF, 2); So we use 'VECTOR_MODE_WITH_PREFIX' to define VNx1SImode and VNx1SFmode. */ VECTOR_MODE_WITH_PREFIX (VNx, INT, SI, 1, 0); VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, SF, 1, 0); -ADJUST_NUNITS (VNx1SI, riscv_vector_chunks); -ADJUST_NUNITS (VNx1SF, riscv_vector_chunks); +ADJUST_NUNITS (VNx1SI, riscv_v_adjust_nunits (VNx1SImode, 1)); +ADJUST_NUNITS (VNx1SF, riscv_v_adjust_nunits (VNx1SFmode, 1)); ADJUST_ALIGNMENT (VNx1SI, 4); ADJUST_ALIGNMENT (VNx1SF, 4); VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 0); -ADJUST_NUNITS (VNx2QI, riscv_vector_chunks * 2); +ADJUST_NUNITS (VNx2QI, riscv_v_adjust_nunits (VNx2QImode, 2)); ADJUST_ALIGNMENT (VNx2QI, 1); /* 'VECTOR_MODES_WITH_PREFIX' does not allow ncomponents < 2. So we use 'VECTOR_MODE_WITH_PREFIX' to define VNx1HImode and VNx1HFmode. */ VECTOR_MODE_WITH_PREFIX (VNx, INT, HI, 1, 0); VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, HF, 1, 0); -ADJUST_NUNITS (VNx1HI, riscv_vector_chunks); -ADJUST_NUNITS (VNx1HF, riscv_vector_chunks); +ADJUST_NUNITS (VNx1HI, riscv_v_adjust_nunits (VNx1HImode, 1)); +ADJUST_NUNITS (VNx1HF, riscv_v_adjust_nunits (VNx1HFmode, 1)); ADJUST_ALIGNMENT (VNx1HI, 2); ADJUST_ALIGNMENT (VNx1HF, 2); /* 'VECTOR_MODES_WITH_PREFIX' does not allow ncomponents < 2. So we use 'VECTOR_MODE_WITH_PREFIX' to define VNx1QImode. */ VECTOR_MODE_WITH_PREFIX (VNx, INT, QI, 1, 0); -ADJUST_NUNITS (VNx1QI, riscv_vector_chunks); +ADJUST_NUNITS (VNx1QI, riscv_v_adjust_nunits (VNx1QImode, 1)); ADJUST_ALIGNMENT (VNx1QI, 1); /* TODO: According to RISC-V 'V' ISA spec, the maximun vector length can diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 55e0bc0..63ac56a 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -162,6 +162,12 @@ enum stack_protector_guard { #define MASK_ZMMUL (1 << 0) #define TARGET_ZMMUL ((riscv_zm_subext & MASK_ZMMUL) != 0) +#define MASK_SVINVAL (1 << 0) +#define MASK_SVNAPOT (1 << 1) + +#define TARGET_SVINVAL ((riscv_sv_subext & MASK_SVINVAL) != 0) +#define TARGET_SVNAPOT ((riscv_sv_subext & MASK_SVNAPOT) != 0) + /* Bit of riscv_zvl_flags will set contintuly, N-1 bit will set if N-bit is set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use popcount to caclulate the minimal VLEN. */ diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f8c9932..5a718bb 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -75,7 +75,8 @@ extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); -extern bool riscv_v_ext_enabled_vector_mode_p (machine_mode); +extern poly_uint64 riscv_regmode_natural_size (machine_mode); +extern bool riscv_v_ext_vector_mode_p (machine_mode); /* Routines implemented in riscv-c.cc. */ void riscv_cpu_cpp_builtins (cpp_reader *); @@ -126,6 +127,19 @@ extern bool verify_type_context (location_t, type_context_kind, const_tree, bool extern void handle_pragma_vector (void); extern tree builtin_decl (unsigned, bool); extern rtx expand_builtin (unsigned int, tree, rtx); +extern bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern bool legitimize_move (rtx, rtx, machine_mode); +enum tail_policy +{ + TAIL_UNDISTURBED = 0, + TAIL_AGNOSTIC = 1, +}; + +enum mask_policy +{ + MASK_UNDISTURBED = 0, + MASK_AGNOSTIC = 1, +}; } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc new file mode 100644 index 0000000..6615a5c --- /dev/null +++ b/gcc/config/riscv/riscv-v.cc @@ -0,0 +1,180 @@ +/* Subroutines used for code generation for RISC-V 'V' Extension for GNU + compiler. Copyright (C) 2022-2022 Free Software Foundation, Inc. Contributed + by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "backend.h" +#include "rtl.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "recog.h" +#include "alias.h" +#include "tree.h" +#include "stringpool.h" +#include "attribs.h" +#include "explow.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "tm_p.h" +#include "target.h" +#include "expr.h" +#include "optabs.h" + +using namespace riscv_vector; + +namespace riscv_vector { + +template <int MAX_OPERANDS> class insn_expander +{ +public: + insn_expander () : m_opno (0) {} + void add_output_operand (rtx x, machine_mode mode) + { + create_output_operand (&m_ops[m_opno++], x, mode); + gcc_assert (m_opno <= MAX_OPERANDS); + } + void add_input_operand (rtx x, machine_mode mode) + { + create_input_operand (&m_ops[m_opno++], x, mode); + gcc_assert (m_opno <= MAX_OPERANDS); + } + void add_all_one_mask_operand (machine_mode mode) + { + add_input_operand (CONSTM1_RTX (mode), mode); + } + void add_vundef_operand (machine_mode mode) + { + add_input_operand (gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), + UNSPEC_VUNDEF), + mode); + } + void add_policy_operand (enum tail_policy vta, enum mask_policy vma) + { + rtx tail_policy_rtx = vta == TAIL_UNDISTURBED ? const0_rtx : const1_rtx; + rtx mask_policy_rtx = vma == MASK_UNDISTURBED ? const0_rtx : const1_rtx; + add_input_operand (tail_policy_rtx, Pmode); + add_input_operand (mask_policy_rtx, Pmode); + } + + void expand (enum insn_code icode, bool temporary_volatile_p = false) + { + if (temporary_volatile_p) + { + temporary_volatile_ok v (true); + expand_insn (icode, m_opno, m_ops); + } + else + expand_insn (icode, m_opno, m_ops); + } + +private: + int m_opno; + expand_operand m_ops[MAX_OPERANDS]; +}; + +/* Return true if X is a const_vector with all duplicate elements, which is in + the range between MINVAL and MAXVAL. */ +bool +const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) +{ + rtx elt; + return (const_vec_duplicate_p (x, &elt) && CONST_INT_P (elt) + && IN_RANGE (INTVAL (elt), minval, maxval)); +} + +/* Emit an RVV unmask && vl mov from SRC to DEST. */ +static void +emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode) +{ + insn_expander<7> e; + + machine_mode mode = GET_MODE (dest); + if (register_operand (src, mode) && register_operand (dest, mode)) + { + emit_move_insn (dest, src); + return; + } + + e.add_output_operand (dest, mode); + e.add_all_one_mask_operand (mask_mode); + /* For load operation, we create undef operand. + For store operation, we make it depend on the dest memory to + avoid potential bugs. */ + if (MEM_P (src)) + e.add_vundef_operand (mode); + else + e.add_input_operand (dest, mode); + + e.add_input_operand (src, mode); + e.add_input_operand (vl, Pmode); + + e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC); + + enum insn_code icode; + icode = code_for_pred_mov (mode); + e.expand (icode, true); +} + +/* Expand a pre-RA RVV data move from SRC to DEST. + It expands move for RVV fractional vector modes. */ +bool +legitimize_move (rtx dest, rtx src, machine_mode mask_mode) +{ + machine_mode mode = GET_MODE (dest); + /* For whole registers load/store or register-register move, + we don't need to specially handle them, just let them go + through "*mov<mode>" and then use the codegen directly. */ + if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) + && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)) + || (register_operand (src, mode) && register_operand (dest, mode))) + { + /* Need to force register if mem <- !reg. */ + if (MEM_P (dest) && !REG_P (src)) + src = force_reg (mode, src); + return false; + } + + rtx vlmax = gen_reg_rtx (Pmode); + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + ? 8 + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + emit_insn (gen_vsetvl_no_side_effects ( + Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode), + gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx)); + + if (!register_operand (src, mode) && !register_operand (dest, mode)) + { + rtx tmp = gen_reg_rtx (mode); + if (MEM_P (src)) + emit_pred_move (tmp, src, vlmax, mask_mode); + else + emit_move_insn (tmp, src); + src = tmp; + } + emit_pred_move (dest, src, vlmax, mask_mode); + return true; +} + +} // namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 8582c0c..713a756 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -72,18 +72,8 @@ public: e.add_input_operand (Pmode, gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode)); - /* LMUL. Define the bitmap rule as follows: - | 4 | 3 2 1 0 | - | fractional_p | factor | - */ - bool fractional_p = known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR); - unsigned int factor - = fractional_p ? exact_div (BYTES_PER_RISCV_VECTOR, GET_MODE_SIZE (mode)) - .to_constant () - : exact_div (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) - .to_constant (); - e.add_input_operand (Pmode, - gen_int_mode ((fractional_p << 4) | factor, Pmode)); + /* LMUL. */ + e.add_input_operand (Pmode, gen_int_mode ((unsigned int) mode, Pmode)); /* TA. */ e.add_input_operand (Pmode, gen_int_mode (1, Pmode)); @@ -94,8 +84,8 @@ public: } }; -static CONSTEXPR const vsetvl<false> vsetvl_obj; -static CONSTEXPR const vsetvl<true> vsetvlmax_obj; +static constexpr const vsetvl<false> vsetvl_obj; +static constexpr const vsetvl<true> vsetvlmax_obj; namespace bases { const function_base *const vsetvl = &vsetvl_obj; const function_base *const vsetvlmax = &vsetvlmax_obj; diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 24fc1c0..14c5969 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -71,7 +71,7 @@ build_all (function_builder &b, const function_group_info &group) /* Declare the function shape NAME, pointing it to an instance of class <NAME>_def. */ #define SHAPE(DEF, VAR) \ - static CONSTEXPR const DEF##_def VAR##_obj; \ + static constexpr const DEF##_def VAR##_obj; \ namespace shapes { const function_shape *const VAR = &VAR##_obj; } /* Base class for for build. */ diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index dc41078..06a4a85 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -88,7 +88,7 @@ struct registered_function_hasher : nofree_ptr_hash<registered_function> }; /* Static information about each RVV type. */ -static CONSTEXPR const vector_type_info vector_types[] = { +static constexpr const vector_type_info vector_types[] = { #define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \ {#NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, #include "riscv-vector-builtins.def" @@ -123,23 +123,23 @@ static const rvv_type_info i_ops[] = { #include "riscv-vector-builtins-types.def" {NUM_VECTOR_TYPES, 0}}; -static CONSTEXPR const rvv_arg_type_info rvv_arg_type_info_end +static constexpr const rvv_arg_type_info rvv_arg_type_info_end = rvv_arg_type_info (NUM_BASE_TYPES); /* A list of args for size_t func (void) function. */ -static CONSTEXPR const rvv_arg_type_info void_args[] +static constexpr const rvv_arg_type_info void_args[] = {rvv_arg_type_info (RVV_BASE_void), rvv_arg_type_info_end}; /* A list of args for size_t func (size_t) function. */ -static CONSTEXPR const rvv_arg_type_info size_args[] +static constexpr const rvv_arg_type_info size_args[] = {rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info_end}; /* A list of none preds that will be registered for intrinsic functions. */ -static CONSTEXPR const predication_type_index none_preds[] +static constexpr const predication_type_index none_preds[] = {PRED_TYPE_none, NUM_PRED_TYPES}; /* A static operand information for size_t func (void) function registration. */ -static CONSTEXPR const rvv_op_info i_none_size_void_ops +static constexpr const rvv_op_info i_none_size_void_ops = {i_ops, /* Types */ OP_TYPE_none, /* Suffix */ rvv_arg_type_info (RVV_BASE_size), /* Return type */ @@ -147,7 +147,7 @@ static CONSTEXPR const rvv_op_info i_none_size_void_ops /* A static operand information for size_t func (size_t) function registration. */ -static CONSTEXPR const rvv_op_info i_none_size_size_ops +static constexpr const rvv_op_info i_none_size_size_ops = {i_ops, /* Types */ OP_TYPE_none, /* Suffix */ rvv_arg_type_info (RVV_BASE_size), /* Return type */ @@ -202,7 +202,7 @@ rvv_switcher::rvv_switcher () memcpy (m_old_have_regs_of_mode, have_regs_of_mode, sizeof (have_regs_of_mode)); for (int i = 0; i < NUM_MACHINE_MODES; ++i) - if (riscv_v_ext_enabled_vector_mode_p ((machine_mode) i)) + if (riscv_v_ext_vector_mode_p ((machine_mode) i)) have_regs_of_mode[i] = true; } @@ -271,7 +271,7 @@ register_builtin_type (vector_type_index type, tree eltype, machine_mode mode) builtin_types[type].scalar = eltype; builtin_types[type].scalar_ptr = build_pointer_type (eltype); builtin_types[type].scalar_const_ptr = build_const_pointer (eltype); - if (!riscv_v_ext_enabled_vector_mode_p (mode)) + if (!riscv_v_ext_vector_mode_p (mode)) return; tree vectype = build_vector_type_for_mode (eltype, mode); diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 425da12..e5636e2 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -171,7 +171,7 @@ struct rvv_builtin_suffixes /* RVV Builtin argument information. */ struct rvv_arg_type_info { - CONSTEXPR rvv_arg_type_info (rvv_base_type base_type_in) + constexpr rvv_arg_type_info (rvv_base_type base_type_in) : base_type (base_type_in) {} enum rvv_base_type base_type; diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def index cacfccb..ee8ebd5 100644 --- a/gcc/config/riscv/riscv-vector-switch.def +++ b/gcc/config/riscv/riscv-vector-switch.def @@ -155,10 +155,6 @@ ENTRY (VNx4DF, TARGET_VECTOR_FP64) ENTRY (VNx2DF, TARGET_VECTOR_FP64) ENTRY (VNx1DF, TARGET_VECTOR_FP64) -/* SEW = 128. Disable all of them. */ -ENTRY (VNx2TI, false) -ENTRY (VNx2TF, false) - #undef TARGET_VECTOR_FP32 #undef TARGET_VECTOR_FP64 #undef ENTRY diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 90a3904..3d02954 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -396,12 +396,9 @@ static const unsigned gpr_save_reg_order[] = { /* A table describing all the processors GCC knows about. */ static const struct riscv_tune_info riscv_tune_info_table[] = { - { "rocket", generic, &rocket_tune_info }, - { "sifive-3-series", generic, &rocket_tune_info }, - { "sifive-5-series", generic, &rocket_tune_info }, - { "sifive-7-series", sifive_7, &sifive_7_tune_info }, - { "thead-c906", generic, &thead_c906_tune_info }, - { "size", generic, &optimize_size_tune_info }, +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ + { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO}, +#include "riscv-cores.def" }; void riscv_frame_info::reset(void) @@ -944,30 +941,12 @@ riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode, return true; } -/* Return true if mode is the RVV mode. */ - -static bool -riscv_v_ext_vector_mode_p (machine_mode mode) -{ -#define ENTRY(MODE, REQUIREMENT) \ - case MODE##mode: \ - return true; - switch (mode) - { -#include "riscv-vector-switch.def" - default: - return false; - } - - return false; -} - /* Return true if mode is the RVV enabled mode. For example: 'VNx1DI' mode is disabled if MIN_VLEN == 32. 'VNx1SI' mode is enabled if MIN_VLEN == 32. */ bool -riscv_v_ext_enabled_vector_mode_p (machine_mode mode) +riscv_v_ext_vector_mode_p (machine_mode mode) { #define ENTRY(MODE, REQUIREMENT) \ case MODE##mode: \ @@ -982,6 +961,17 @@ riscv_v_ext_enabled_vector_mode_p (machine_mode mode) return false; } +/* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct + NUNITS size for corresponding machine_mode. */ + +poly_int64 +riscv_v_adjust_nunits (machine_mode mode, int scale) +{ + if (riscv_v_ext_vector_mode_p (mode)) + return riscv_vector_chunks * scale; + return scale; +} + /* Return true if X is a valid address for machine mode MODE. If it is, fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ @@ -1958,6 +1948,18 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) { if (CONST_POLY_INT_P (src)) { + /* + Handle: + (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156) + (const_int 96 [0x60])) [0 S1 A8]) + (const_poly_int:QI [8, 8])) + "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil)) + */ + if (MEM_P (dest)) + { + emit_move_insn (dest, force_reg (mode, src)); + return true; + } poly_int64 value = rtx_to_poly_int64 (src); if (!value.is_constant () && !TARGET_VECTOR) { @@ -4136,14 +4138,42 @@ riscv_print_operand (FILE *file, rtx op, int letter) switch (letter) { case 'm': { - if (code == CONST_INT) + if (riscv_v_ext_vector_mode_p (mode)) { - /* LMUL. Define the bitmap rule as follows: - | 4 | 3 2 1 0 | - | fractional_p | factor | - */ - bool fractional_p = (UINTVAL (op) >> 4) & 0x1; - unsigned int factor = UINTVAL (op) & 0xf; + /* Calculate lmul according to mode and print the value. */ + poly_int64 size = GET_MODE_SIZE (mode); + unsigned int lmul; + if (known_lt (size, BYTES_PER_RISCV_VECTOR)) + lmul = 1; + else + lmul = exact_div (size, BYTES_PER_RISCV_VECTOR).to_constant (); + asm_fprintf (file, "%d", lmul); + } + else if (code == CONST_INT) + { + /* The value in the operand is the unsigned int value + converted from (enum machine_mode). + This RTX is generated as follows: + + machine_mode mode = XXXmode; + operand = gen_int_mode ((unsigned int)mode, Pmode); + + So we convert it back into machine_mode and then calculate + the LMUL according to GET_MODE_SIZE. */ + + machine_mode rvv_mode = (machine_mode) UINTVAL (op); + /* For rvv mask modes, we can not calculate LMUL simpily according + to BYTES_PER_RISCV_VECTOR. When rvv_mode = VNx4BImode. + Set SEW = 8, LMUL = 1 by default if TARGET_MIN_VLEN == 32. + Set SEW = 8, LMUL = 1 / 2 by default if TARGET_MIN_VLEN > 32. */ + bool bool_p = GET_MODE_CLASS (rvv_mode) == MODE_VECTOR_BOOL; + poly_int64 m1_size = BYTES_PER_RISCV_VECTOR; + poly_int64 rvv_size + = bool_p ? GET_MODE_NUNITS (rvv_mode) : GET_MODE_SIZE (rvv_mode); + bool fractional_p = known_lt (rvv_size, BYTES_PER_RISCV_VECTOR); + unsigned int factor + = fractional_p ? exact_div (m1_size, rvv_size).to_constant () + : exact_div (rvv_size, m1_size).to_constant (); asm_fprintf (file, "%s%d", fractional_p ? "mf" : "m", factor); } else @@ -4151,7 +4181,15 @@ riscv_print_operand (FILE *file, rtx op, int letter) break; } case 'p': { - if (code == CONST_INT) + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + /* Print for RVV mask operand. + If op is reg, print ",v0.t". + Otherwise, don't print anything. */ + if (code == REG) + fprintf (file, ",%s.t", reg_names[REGNO (op)]); + } + else if (code == CONST_INT) { /* Tail && Mask policy. */ bool agnostic_p = UINTVAL (op) & 0x1; @@ -4987,8 +5025,8 @@ riscv_expand_epilogue (int style) rtx insn; /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p - = known_ne (get_frame_size (), cfun->machine->frame.arg_pointer_offset); + bool need_barrier_p = known_ne (get_frame_size () + + cfun->machine->frame.arg_pointer_offset, 0); if (cfun->machine->naked_p) { @@ -6375,7 +6413,7 @@ static bool riscv_vector_mode_supported_p (machine_mode mode) { if (TARGET_VECTOR) - return riscv_v_ext_enabled_vector_mode_p (mode); + return riscv_v_ext_vector_mode_p (mode); return false; } @@ -6407,6 +6445,21 @@ riscv_vector_alignment (const_tree type) return wi::umin (min_size, 128).to_uhwi (); } +/* Implement REGMODE_NATURAL_SIZE. */ + +poly_uint64 +riscv_regmode_natural_size (machine_mode mode) +{ + /* The natural size for RVV data modes is one RVV data vector, + and similarly for predicates. We can't independently modify + anything smaller than that. */ + /* ??? For now, only do this for variable-width RVV registers. + Doing it for constant-sized registers breaks lower-subreg.c. */ + if (!riscv_vector_chunks.is_constant () && riscv_v_ext_vector_mode_p (mode)) + return BYTES_PER_RISCV_VECTOR; + return UNITS_PER_WORD; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index acae68e..1385f0a 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -516,7 +516,7 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000000, 0x00000001 }, /* V0_REGS */ \ { 0x00000000, 0x00000000, 0x00000000, 0xfffffffe }, /* VNoV0_REGS */ \ { 0x00000000, 0x00000000, 0x00000000, 0xffffffff }, /* V_REGS */ \ - { 0xffffffff, 0xffffffff, 0x00000003, 0x00000000 } /* ALL_REGS */ \ + { 0xffffffff, 0xffffffff, 0x0000000f, 0xffffffff } /* ALL_REGS */ \ } /* A C expression whose value is a register class containing hard @@ -1019,6 +1019,7 @@ extern bool riscv_slow_unaligned_access_p; extern unsigned riscv_stack_boundary; extern unsigned riscv_bytes_per_vector_chunk; extern poly_uint16 riscv_vector_chunks; +extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int); /* The number of bits and bytes in a RVV vector. */ #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8)) #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk)) @@ -1080,4 +1081,6 @@ extern void riscv_remove_unneeded_save_restore_calls (void); #define REGISTER_TARGET_PRAGMAS() riscv_register_pragmas () +#define REGMODE_NATURAL_SIZE(MODE) riscv_regmode_natural_size (MODE) + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 9384ced..ae907a7 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -152,7 +152,14 @@ (const_string "unknown")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF" +(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF, + VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI, + VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI, + VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI, + VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI, + VNx1DI,VNx2DI,VNx4DI,VNx8DI, + VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF, + VNx1DF,VNx2DF,VNx4DF,VNx8DF" (const_string "unknown")) ;; True if the main data type is twice the size of a word. diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 8923a11..9493117 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -224,6 +224,9 @@ int riscv_zf_subext TargetVariable int riscv_zm_subext +TargetVariable +int riscv_sv_subext + Enum Name(isa_spec_class) Type(enum riscv_isa_spec_class) Supported ISA specs (for use with the -misa-spec= option): diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 8f67676..7997db3 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -63,6 +63,10 @@ riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc $(COMPILE) $< $(POSTCOMPILE) +riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc + $(COMPILE) $< + $(POSTCOMPILE) + PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \ diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md new file mode 100644 index 0000000..627e2f7 --- /dev/null +++ b/gcc/config/riscv/vector-iterators.md @@ -0,0 +1,58 @@ +;; Iterators for RISC-V 'V' Extension for GNU compiler. +;; Copyright (C) 2022-2022 Free Software Foundation, Inc. +;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_mode_iterator V [ + VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") + VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") + VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") + VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32") + (VNx4SF "TARGET_VECTOR_ELEN_FP_32") + (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx2DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8DF "TARGET_VECTOR_ELEN_FP_64") +]) + +(define_mode_iterator VB [ + VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI + (VNx64BI "TARGET_MIN_VLEN > 32") +]) + +(define_mode_attr VM [ + (VNx1QI "VNx1BI") (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI") (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") + (VNx1HI "VNx1BI") (VNx2HI "VNx2BI") (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI") + (VNx1SI "VNx1BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI") (VNx16SI "VNx16BI") + (VNx1DI "VNx1BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI") (VNx8DI "VNx8BI") + (VNx1SF "VNx1BI") (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI") + (VNx1DF "VNx1BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI") +]) + +(define_mode_attr sew [ + (VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") + (VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") + (VNx1SI "32") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") + (VNx1DI "64") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") + (VNx1SF "32") (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") + (VNx1DF "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") +]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 82ce902..451ed23 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -26,11 +26,72 @@ ;; - Auto-vectorization (TBD) ;; - Combine optimization (TBD) +(include "vector-iterators.md") + (define_c_enum "unspec" [ UNSPEC_VSETVL + UNSPEC_VUNDEF + UNSPEC_VPREDICATE ]) ;; ----------------------------------------------------------------- +;; ---- Miscellaneous Operations +;; ----------------------------------------------------------------- + +(define_insn "vundefined<mode>" + [(set (match_operand:V 0 "register_operand" "=vr") + (unspec:V [(const_int 0)] UNSPEC_VUNDEF))] + "TARGET_VECTOR" + "") + +;; ----------------------------------------------------------------- +;; ---- Moves Operations +;; ----------------------------------------------------------------- + +(define_expand "mov<mode>" + [(set (match_operand:V 0 "reg_or_mem_operand") + (match_operand:V 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode)) + DONE; +}) + +;; This pattern is used for code-gen for whole register load/stores. +;; Also applicable for all register moves. +;; Fractional vector modes load/store are not allowed to match this pattern. +;; Mask modes load/store are not allowed to match this pattern. +(define_insn "*mov<mode>" + [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr") + (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))] + "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) + && register_operand (operands[1], <MODE>mode)) + || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))" + "@ + vl%m1re<sew>.v\t%0,%1 + vs%m1r.v\t%1,%0 + vmv%m1r.v\t%0,%1" + [(set_attr "type" "vldr,vstr,vmov") + (set_attr "mode" "<MODE>")]) + +(define_expand "mov<mode>" + [(set (match_operand:VB 0 "reg_or_mem_operand") + (match_operand:VB 1 "vector_move_operand"))] + "TARGET_VECTOR" +{ + if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode)) + DONE; +}) + +(define_insn "*mov<mode>" + [(set (match_operand:VB 0 "register_operand" "=vr") + (match_operand:VB 1 "register_operand" "vr"))] + "TARGET_VECTOR" + "vmv1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "<MODE>")]) + +;; ----------------------------------------------------------------- ;; ---- 6. Configuration-Setting Instructions ;; ----------------------------------------------------------------- ;; Includes: @@ -50,13 +111,98 @@ ;; operands[3]: LMUL ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) + +;; We define 2 types of "vsetvl*" instruction patterns: + +;; - "@vsetvl<mode>" is a parallel format which has side effects. + +;; - "@vsetvl<mode>_no_side_effects" has no side effects. + +;; - "@vsetvl<mode>" is used by "vsetvl" intrinsics and "insert-vsetvl" PASS. + +;; - "@vsetvl<mode>_no_side_effects" is used by GCC standard patterns. + +;; - "@vsetvl<mode>" includes VL/VTYPE global registers status (define set) +;; and each RVV instruction includes VL/VTYPE global registers status (use) +;; so that we can guarantee each RVV instruction can execute with correct +;; VL/VTYPE global registers status after "insert-vsetvl" PASS. + +;; - "@vsetvl<mode>_no_side_effects" has no side effects and excludes VL/VTYPE +;; global registers status (define set). It's only used by GCC standard pattern +;; expansion. For example: "mov<mode>" pattern for fractional vector modes which +;; need to set VL/VTYPE. Then we could manually call this pattern to gain benefits +;; from the optimization of each GCC internal PASS. + +;; 1. void foo (float *in, float *out) +;; { +;; vfloat32mf2_t v = *(vfloat32mf2_t*)in; +;; *(vfloat32mf2_t*)out = v; +;; } +;; We could eliminate the second "vsetvl" by calling "@vsetvl<mode>_no_side_effects". +;; +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": +;; vsetvli a4,zero,e32,mf2,ta,ma ;; vsetvli a4,zero,e32,mf2,ta,ma +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; -- +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) +;; ret ;; ret + +;; 2. void foo (int8_t *in, int8_t *out, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; *(vint8mf2_t*)(out + i) = v; +;; } +;; } +;; +;; Hoist "vsetvl" instruction in LICM: +;; "@vsetvl<mode>": ;; "@vsetvl<mode>_no_side_effects": +;; - ;; vsetvli a4,zero,e32,mf2,ta,ma +;; LOOP: ;; LOOP: +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vle32.v v24,(a0) ;; vle32.v v24,(a0) +;; vsetvli a4,zero,e32,mf2,ta,ma ;; - +;; vse32.v v24,(a1) ;; vse32.v v24,(a1) + +;; However, it may produce wrong codegen if we exclude VL/VTYPE in "vsevl<mode>". +;; 3. void foo (int8_t *in, int8_t *out, int32_t *in2, int32_t *out2, int M) +;; { +;; for (int i = 0; i < M; i++){ +;; vint8mf2_t v = *(vint8mf2_t*)(in + i); +;; vint32mf2_t v2 = *(vint32mf2_t*)(in + i + i); +;; *(vint8mf2_t*)(out + i) = v; +;; *(vint32mf2_t*)(out + i + i) = v2; +;; } +;; } +;; +;; vsetvli a6,zero,e8,mf2,ta,ma +;; vsetvli a2,zero,e32,mf2,ta,ma +;; LOOP: +;; vle8.v v25,(a0) +;; vle32.v v24,(a5) +;; addi a0,a0,1 +;; vse8.v v25,(a1) +;; vse32.v v24,(a3) +;; +;; Both vle8.v and vle32.v are using the wrong VL/VTYPE status. +;; We leave it to "insert-vsetvl" PASS to correct this situation. + +;; The "insert-vsetvl" PASS mechanism: +;; 1. Before "insert-vsetvl" PASS, only RVV instructions are generated +;; by GCC standard pattern expansion has the corresponding "vsetvl". +;; We exploit each GCC internal optimization pass to optimize the "vsetvl". +;; 2. Correct the VL/VTYPE status for each GCC standard pattern RVV instructions. +;; Insert vsetvl for each RVV instructions that has no VL/VTYPE status if necessary. +;; For example: RVV intrinsics. +;; 3. Optimize "vsetvl" instructions. + (define_insn "@vsetvl<mode>" - [(set (match_operand:P 0 "register_operand" "=r,r") - (unspec:P [(match_operand:P 1 "csr_operand" "r,K") - (match_operand 2 "const_int_operand" "i,i") - (match_operand 3 "const_int_operand" "i,i") - (match_operand 4 "const_int_operand" "i,i") - (match_operand 5 "const_int_operand" "i,i")] UNSPEC_VSETVL)) + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL)) (set (reg:SI VL_REGNUM) (unspec:SI [(match_dup 1) (match_dup 2) @@ -70,3 +216,124 @@ "vset%i1vli\t%0,%1,e%2,%m3,t%p4,m%p5" [(set_attr "type" "vsetvl") (set_attr "mode" "<MODE>")]) + +;; We keep it as no side effects before reload_completed. +;; In this case, we can gain benefits from different GCC +;; internal PASS such as cprop, fwprop, combine,...etc. + +;; Then recover it for "insert-vsetvl" and "sched2" PASS +;; in order to get correct codegen. +(define_insn_and_split "@vsetvl<mode>_no_side_effects" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "csr_operand" "rK") + (match_operand 2 "const_int_operand" "i") + (match_operand 3 "const_int_operand" "i") + (match_operand 4 "const_int_operand" "i") + (match_operand 5 "const_int_operand" "i")] UNSPEC_VSETVL))] + "TARGET_VECTOR" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (unspec:P [(match_dup 1) (match_dup 2) (match_dup 3) + (match_dup 4) (match_dup 5)] UNSPEC_VSETVL)) + (set (reg:SI VL_REGNUM) + (unspec:SI [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_VSETVL)) + (set (reg:SI VTYPE_REGNUM) + (unspec:SI [(match_dup 2) (match_dup 3) (match_dup 4) + (match_dup 5)] UNSPEC_VSETVL))])] + "" + [(set_attr "type" "vsetvl") + (set_attr "mode" "<MODE>")]) + +;; RVV machine description matching format +;; (define_insn "" +;; [(set (match_operand:MODE 0) +;; (if_then_else:MODE +;; (unspec:<MODE:VM> +;; [(match_operand:<VM> 1 "vector_mask_operand") +;; (match_operand N + 4 "vector_length_operand") +;; (match_operand N + 5 "const_int_operand") +;; (match_operand N + 6 "const_int_operand") +;; (reg:SI VL_REGNUM) +;; (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) +;; (instruction operation:MODE +;; (match_operand 3 +;; (match_operand 4 +;; (match_operand 5 +;; ................ +;; (match_operand N + 3) +;; (match_operand:MODE 2 "vector_reg_or_const0_operand")))] +;; +;; (unspec:[........] UNSPEC_VPREDICATE) is a predicate wrapper. +;; Include mask predicate && length predicate && vector policy. + +;; ------------------------------------------------------------------------------- +;; ---- Predicated Mov +;; ------------------------------------------------------------------------------- +;; Includes: +;; - 7.4. Vector Unit-Stride Instructions +;; - 11.16 Vector Integer Move Instructions +;; - 13.16 Vector Floating-Point Move Instruction +;; - 15.1 Vector Mask-Register Logical Instructions +;; ------------------------------------------------------------------------------- + +;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. +;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. +;; For vse.v, we don't need merge operand, so it should always match "vu". +;; constraint alternative 0 ~ 1 match vle.v. +;; constraint alternative 2 match vse.v. +;; constraint alternative 3 match vmv.v.v. +;; constraint alternative 4 match vmv.v.i. +;; For vmv.v.i, we allow 2 following cases: +;; 1. (const_vector:VNx1QI repeat [ +;; (const_int:QI N)]), -15 <= N < 16. +;; 2. (const_vector:VNx1SF repeat [ +;; (const_double:SF 0.0 [0x0.0p+0])]). +(define_insn "@pred_mov<mode>" + [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") + (if_then_else:V + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") + (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))] + "TARGET_VECTOR" + "@ + vle<sew>.v\t%0,%3%p1 + vle<sew>.v\t%0,%3%p1 + vse<sew>.v\t%3,%0%p1 + vmv.v.v\t%0,%3 + vmv.v.i\t%0,v%3" + [(set_attr "type" "vlde,vlde,vste,vimov,vimov") + (set_attr "mode" "<MODE>")]) + +;; vlm.v/vsm.v/vmclr.m/vmset.m. +;; constraint alternative 0 match vlm.v. +;; constraint alternative 2 match vsm.v. +;; constraint alternative 3 match vmclr.m. +;; constraint alternative 4 match vmset.m. +(define_insn "@pred_mov<mode>" + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr") + (if_then_else:VB + (unspec:VB + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1") + (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))] + "TARGET_VECTOR" + "@ + vlm.v\t%0,%3 + vsm.v\t%3,%0 + vmclr.m\t%0 + vmset.m\t%0" + [(set_attr "type" "vldm,vstm,vmalu,vmalu") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index 59d901a..cd8ee8c 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -414,9 +414,9 @@ static void do_isel (rtx dest, rtx cmp, rtx src_t, rtx src_f, rtx cr) { if (GET_MODE (dest) == DImode) - emit_insn (gen_isel_signed_di (dest, cmp, src_t, src_f, cr)); + emit_insn (gen_isel_cc_di (dest, cmp, src_t, src_f, cr)); else - emit_insn (gen_isel_signed_si (dest, cmp, src_t, src_f, cr)); + emit_insn (gen_isel_cc_si (dest, cmp, src_t, src_f, cr)); } /* Emit a subtract of the proper mode for DEST. diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index d2743f7..a85d7630 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -9759,8 +9759,11 @@ rs6000_init_stack_protect_guard (void) static bool rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { - if (GET_CODE (x) == HIGH - && GET_CODE (XEXP (x, 0)) == UNSPEC) + /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref. + It can not be put into a constant pool. e.g. + (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..) + (high:DI (symbol_ref:DI ("var")..)). */ + if (GET_CODE (x) == HIGH) return true; /* A TLS symbol in the TOC cannot contain a sum. */ @@ -16341,8 +16344,8 @@ rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) signedp = GET_MODE (cr) == CCmode; isel_func = (mode == SImode - ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) - : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); + ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si) + : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di)); switch (cond_code) { diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ad5a4cf..3bae303 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5428,13 +5428,12 @@ ;; leave out the mode in operand 4 and use one pattern, but reload can ;; change the mode underneath our feet and then gets confused trying ;; to reload the value. -(define_mode_iterator CCEITHER [CC CCUNS]) -(define_mode_attr un [(CC "") (CCUNS "un")]) -(define_insn "isel_<un>signed_<GPR:mode>" +(define_mode_iterator CCANY [CC CCUNS]) +(define_insn "isel_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") (if_then_else:GPR (match_operator 1 "scc_comparison_operator" - [(match_operand:CCEITHER 4 "cc_reg_operand" "y,y") + [(match_operand:CCANY 4 "cc_reg_operand" "y,y") (const_int 0)]) (match_operand:GPR 2 "reg_or_zero_operand" "O,b") (match_operand:GPR 3 "gpc_reg_operand" "r,r")))] @@ -5446,11 +5445,11 @@ ;; isel can handle reversed comparisons so long as the operands are ;; registers. -(define_insn "*isel_reversed_<un>signed_<GPR:mode>" +(define_insn "*isel_reversed_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r") (if_then_else:GPR (match_operator 1 "scc_rev_comparison_operator" - [(match_operand:CCEITHER 4 "cc_reg_operand" "y,y") + [(match_operand:CCANY 4 "cc_reg_operand" "y,y") (const_int 0)]) (match_operand:GPR 2 "gpc_reg_operand" "r,r") (match_operand:GPR 3 "reg_or_zero_operand" "O,b")))] @@ -5462,38 +5461,38 @@ [(set_attr "type" "isel")]) ; Set Boolean Condition (Reverse) -(define_insn "setbc_<un>signed_<GPR:mode>" +(define_insn "setbc_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (match_operator:GPR 1 "scc_comparison_operator" - [(match_operand:CCEITHER 2 "cc_reg_operand" "y") + [(match_operand:CCANY 2 "cc_reg_operand" "y") (const_int 0)]))] "TARGET_POWER10" "setbc %0,%j1" [(set_attr "type" "isel")]) -(define_insn "*setbcr_<un>signed_<GPR:mode>" +(define_insn "*setbcr_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (match_operator:GPR 1 "scc_rev_comparison_operator" - [(match_operand:CCEITHER 2 "cc_reg_operand" "y") + [(match_operand:CCANY 2 "cc_reg_operand" "y") (const_int 0)]))] "TARGET_POWER10" "setbcr %0,%j1" [(set_attr "type" "isel")]) ; Set Negative Boolean Condition (Reverse) -(define_insn "*setnbc_<un>signed_<GPR:mode>" +(define_insn "*setnbc_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (neg:GPR (match_operator:GPR 1 "scc_comparison_operator" - [(match_operand:CCEITHER 2 "cc_reg_operand" "y") + [(match_operand:CCANY 2 "cc_reg_operand" "y") (const_int 0)])))] "TARGET_POWER10" "setnbc %0,%j1" [(set_attr "type" "isel")]) -(define_insn "*setnbcr_<un>signed_<GPR:mode>" +(define_insn "*setnbcr_<CCANY:mode>_<GPR:mode>" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (neg:GPR (match_operator:GPR 1 "scc_rev_comparison_operator" - [(match_operand:CCEITHER 2 "cc_reg_operand" "y") + [(match_operand:CCANY 2 "cc_reg_operand" "y") (const_int 0)])))] "TARGET_POWER10" "setnbcr %0,%j1" @@ -12644,7 +12643,7 @@ rtx compare = gen_rtx_COMPARE (CCmode, operands[1], operands[2]); emit_insn (gen_rtx_SET (cc, compare)); rtx eq = gen_rtx_fmt_ee (EQ, <MODE>mode, cc, const0_rtx); - emit_insn (gen_setbc_signed_<mode> (operands[0], eq, cc)); + emit_insn (gen_setbc_cc_<mode> (operands[0], eq, cc)); DONE; } @@ -12700,7 +12699,7 @@ rtx compare = gen_rtx_COMPARE (CCmode, operands[1], operands[2]); emit_insn (gen_rtx_SET (cc, compare)); rtx ne = gen_rtx_fmt_ee (NE, <MODE>mode, cc, const0_rtx); - emit_insn (gen_setbc_signed_<mode> (operands[0], ne, cc)); + emit_insn (gen_setbc_cc_<mode> (operands[0], ne, cc)); DONE; } diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 950eb5a..94a98c2 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -3471,15 +3471,14 @@ xtensa_expand_epilogue (bool sibcall_p) if (xtensa_call_save_reg(regno)) { rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); - rtx reg; offset -= UNITS_PER_WORD; - emit_move_insn (reg = gen_rtx_REG (SImode, regno), + emit_move_insn (gen_rtx_REG (SImode, regno), gen_frame_mem (SImode, x)); - if (regno == A0_REG && sibcall_p) - emit_use (reg); } } + if (sibcall_p) + emit_use (gen_rtx_REG (SImode, A0_REG)); if (cfun->machine->current_frame_size > 0) { @@ -4970,6 +4969,13 @@ xtensa_conditional_register_usage (void) /* Remove hard FP register from the preferred reload registers set. */ CLEAR_HARD_REG_BIT (reg_class_contents[(int)RL_REGS], HARD_FRAME_POINTER_REGNUM); + + /* Register A0 holds the return address upon entry to a function + for the CALL0 ABI, but unlike the windowed register ABI, it is + not reserved for this purpose and may hold other values after + the return address has been saved. */ + if (!TARGET_WINDOWED_ABI) + fixed_regs[A0_REG] = 0; } /* Map hard register number to register class */ diff --git a/gcc/configure b/gcc/configure index c6def4c..89e00b7 100755 --- a/gcc/configure +++ b/gcc/configure @@ -7842,6 +7842,10 @@ if test x${enable_multiarch} = xauto; then enable_multiarch=no fi fi +if test x${enable_multiarch} = xyes; then + $as_echo "#define ENABLE_MULTIARCH 1" >>confdefs.h + +fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for multiarch configuration" >&5 $as_echo_n "checking for multiarch configuration... " >&6; } @@ -19714,7 +19718,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19717 "configure" +#line 19721 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19820,7 +19824,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19823 "configure" +#line 19827 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/gcc/configure.ac b/gcc/configure.ac index 45bf756..eb92a37 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -887,6 +887,9 @@ if test x${enable_multiarch} = xauto; then enable_multiarch=no fi fi +if test x${enable_multiarch} = xyes; then + AC_DEFINE(ENABLE_MULTIARCH, 1) +fi AC_MSG_CHECKING(for multiarch configuration) AC_SUBST(enable_multiarch) AC_MSG_RESULT($enable_multiarch$ma_msg_suffix) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 830324c..550515c 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,123 @@ +2022-10-25 Nathan Sidwell <nathan@acm.org> + + * parser.cc (synthesize_implicit_template_parm): Fix thinko about + mark the new parm DECL_VIRTUAL_P. Avoid unneccessary tree_last call. + +2022-10-25 Patrick Palka <ppalka@redhat.com> + + * constraint.cc (resolve_function_concept_overload): Explicitly + pass complain=tf_none to coerce_template_parms. + (resolve_concept_check): Likewise. + (normalize_concept_check): Likewise. + * cp-tree.h (coerce_template_parms): Declare the main overload + and default its last parameter to true. Remove wrapper overloads. + * pt.cc (determine_specialization): Adjust calls to + coerce_template_parms and coerce_innermost_template_parms after + removing their last parameter. + (coerce_template_args_for_ttp): Likewise. + (coerce_ttp_args_for_tta): Likewise. + (coerce_template_template_parms): Likewise. + (coerce_template_parms): Remove use_default_args parameter and + adjust function comment. Document default argument. Remove + wrapper overloads. No longer static. + (coerce_innermost_template_parms): Remove use_default_args + parameter. Default require_all_args to true. + (lookup_template_class): As with determine_specialization. + (finish_template_variable): Likewise. + (tsubst_decl): Likewise. + (instantiate_alias_template): Likewise. + (fn_type_unification): Likewise. + (resolve_overloaded_unification): Likewise. + (resolve_nondeduced_context): Likewise. + (get_partial_spec_bindings): Likewise. + +2022-10-25 Jason Merrill <jason@redhat.com> + + * constexpr.cc (find_failing_clause_r): Re-add the call to + contextual_conv_bool. + +2022-10-25 Patrick Palka <ppalka@redhat.com> + + PR c++/106848 + PR c++/102600 + * module.cc (trees_out::core_vals): Stream TYPE_MAX_VALUE and + TYPE_MIN_VALUE of ENUMERAL_TYPE. + (trees_in::core_vals): Likewise. + (trees_out::write_enum_def): Don't stream them here. + (trees_in::read_enum_def): Likewise. + +2022-10-25 Jason Merrill <jason@redhat.com> + + * constexpr.cc (class constexpr_global_ctx): Add modifiable field, + get_value, get_value_ptr, put_value, remove_value, flush_modifiable + member functions. + (class modifiable_tracker): New. + (cxx_eval_internal_function): Use it. + (diagnose_failing_condition): Strip CLEANUP_POINT_EXPR. + +2022-10-25 Jason Merrill <jason@redhat.com> + + * constexpr.cc (fold_operand): New function. + (find_failing_clause_r): Add const. + (find_failing_clause): Add const. + (diagnose_failing_condition): Add ctx parameter. + (cxx_eval_internal_function): Pass it. + * semantics.cc (diagnose_failing_condition): Move to constexpr.cc. + * cp-tree.h: Adjust. + +2022-10-24 Jason Merrill <jason@redhat.com> + + * cp-gimplify.cc (fold_builtin_source_location) + * vtable-class-hierarchy.cc (register_all_pairs): Simplify calls to + build_string_literal. + (build_string_from_id): Remove. + +2022-10-24 Marek Polacek <polacek@redhat.com> + + PR c++/107276 + * typeck.cc (treat_lvalue_as_rvalue_p): Check the return value of move. + +2022-10-24 Jakub Jelinek <jakub@redhat.com> + + PR c++/107358 + * typeck.cc (cp_build_binary_op): Pass operands before excess precision + promotions to scalar_to_vector call. + +2022-10-24 Jakub Jelinek <jakub@redhat.com> + + PR c++/105774 + * constexpr.cc (cxx_eval_increment_expression): For signed types + that promote to int, evaluate PLUS_EXPR or MINUS_EXPR in int type. + +2022-10-24 Arsen Arsenović <arsen@aarsen.me> + + * cp-tree.h (DECL_MAIN_P): Move most logic, besides the hosted + check, from here... + (DECL_MAIN_ANY_P): ... to here, so that it can be reused ... + (DECL_MAIN_FREESTANDING_P): ... here, with an additional + constraint on (hosted OR return type == int) + * decl.cc (finish_function): Use DECL_MAIN_FREESTANDING_P + instead of DECL_MAIN_P, to loosen the hosted requirement, but + check noreturn, before adding implicit returns. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR c++/107295 + * constexpr.cc (reduced_constant_expression_p) <case CONSTRUCTOR>: + Return false for VECTOR_TYPE CONSTRUCTORs even without + CONSTRUCTOR_NO_CLEARING set on them. + (cxx_eval_bare_aggregate): If constant but !changed, fold before + returning VECTOR_TYPE_P CONSTRUCTOR. + (cxx_eval_constant_expression) <case CONSTRUCTOR>: Don't fold + TREE_CONSTANT CONSTRUCTOR, just return it. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR c++/106863 + * semantics.cc (finish_compound_literal): For void{}, if + processing_template_decl return a COMPOUND_LITERAL_P + CONSTRUCTOR rather than void_node. + 2022-10-20 Patrick Palka <ppalka@redhat.com> * pt.cc (lookup_and_finish_template_variable): Don't diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 0366396..15b4f2c 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -1092,10 +1092,11 @@ enum constexpr_switch_state { cxx_eval_outermost_constant_expr invocation. VALUES is a map of values of variables initialized within the expression. */ -struct constexpr_global_ctx { +class constexpr_global_ctx { /* Values for any temporaries or local variables within the constant-expression. */ hash_map<tree,tree> values; +public: /* Number of cxx_eval_constant_expression calls (except skipped ones, on simple constants or location wrappers) encountered during current cxx_eval_outermost_constant_expr call. */ @@ -1105,11 +1106,61 @@ struct constexpr_global_ctx { auto_vec<tree, 16> heap_vars; /* Cleanups that need to be evaluated at the end of CLEANUP_POINT_EXPR. */ vec<tree> *cleanups; + /* If non-null, only allow modification of existing values of the variables + in this set. Set by modifiable_tracker, below. */ + hash_set<tree> *modifiable; /* Number of heap VAR_DECL deallocations. */ unsigned heap_dealloc_count; /* Constructor. */ constexpr_global_ctx () - : constexpr_ops_count (0), cleanups (NULL), heap_dealloc_count (0) {} + : constexpr_ops_count (0), cleanups (NULL), modifiable (nullptr), + heap_dealloc_count (0) {} + + tree get_value (tree t) + { + if (tree *p = values.get (t)) + return *p; + return NULL_TREE; + } + tree *get_value_ptr (tree t) + { + if (modifiable && !modifiable->contains (t)) + return nullptr; + return values.get (t); + } + void put_value (tree t, tree v) + { + bool already_in_map = values.put (t, v); + if (!already_in_map && modifiable) + modifiable->add (t); + } + void remove_value (tree t) { values.remove (t); } +}; + +/* Helper class for constexpr_global_ctx. In some cases we want to avoid + side-effects from evaluation of a particular subexpression of a + constant-expression. In such cases we use modifiable_tracker to prevent + modification of variables created outside of that subexpression. + + ??? We could change the hash_set to a hash_map, allow and track external + modifications, and roll them back in the destructor. It's not clear to me + that this would be worthwhile. */ + +class modifiable_tracker +{ + hash_set<tree> set; + constexpr_global_ctx *global; +public: + modifiable_tracker (constexpr_global_ctx *g): global(g) + { + global->modifiable = &set; + } + ~modifiable_tracker () + { + for (tree t: set) + global->remove_value (t); + global->modifiable = nullptr; + } }; /* The constexpr expansion context. CALL is the current function @@ -1653,7 +1704,7 @@ addr_of_non_const_var (tree *tp, int *walk_subtrees, void *data) return var; constexpr_global_ctx *global = (constexpr_global_ctx *) data; - if (global->values.get (var)) + if (global->get_value (var)) return var; } if (TYPE_P (*tp)) @@ -1799,6 +1850,24 @@ cx_error_context (void) return r; } +/* E is an operand of a failed assertion, fold it either with or without + constexpr context. */ + +static tree +fold_operand (tree e, const constexpr_ctx *ctx) +{ + if (ctx) + { + bool new_non_constant_p = false, new_overflow_p = false; + e = cxx_eval_constant_expression (ctx, e, vc_prvalue, + &new_non_constant_p, + &new_overflow_p); + } + else + e = fold_non_dependent_expr (e, tf_none, /*manifestly_const_eval=*/true); + return e; +} + /* If we have a condition in conjunctive normal form (CNF), find the first failing clause. In other words, given an expression like @@ -1807,7 +1876,7 @@ cx_error_context (void) return the first 'false'. EXPR is the expression. */ static tree -find_failing_clause_r (constexpr_ctx *ctx, tree expr) +find_failing_clause_r (const constexpr_ctx *ctx, tree expr) { if (TREE_CODE (expr) == TRUTH_ANDIF_EXPR) { @@ -1819,15 +1888,7 @@ find_failing_clause_r (constexpr_ctx *ctx, tree expr) return e; } tree e = contextual_conv_bool (expr, tf_none); - if (ctx) - { - bool new_non_constant_p = false, new_overflow_p = false; - e = cxx_eval_constant_expression (ctx, e, vc_prvalue, - &new_non_constant_p, - &new_overflow_p); - } - else - e = fold_non_dependent_expr (e, tf_none, /*manifestly_const_eval=*/true); + e = fold_operand (e, ctx); if (integer_zerop (e)) /* This is the failing clause. */ return expr; @@ -1837,7 +1898,7 @@ find_failing_clause_r (constexpr_ctx *ctx, tree expr) /* Wrapper for find_failing_clause_r. */ tree -find_failing_clause (constexpr_ctx *ctx, tree expr) +find_failing_clause (const constexpr_ctx *ctx, tree expr) { if (TREE_CODE (expr) == TRUTH_ANDIF_EXPR) if (tree e = find_failing_clause_r (ctx, expr)) @@ -1845,6 +1906,36 @@ find_failing_clause (constexpr_ctx *ctx, tree expr) return expr; } +/* Emit additional diagnostics for failing condition BAD. + Used by finish_static_assert and IFN_ASSUME constexpr diagnostics. + If SHOW_EXPR_P is true, print the condition (because it was + instantiation-dependent). */ + +void +diagnose_failing_condition (tree bad, location_t cloc, bool show_expr_p, + const constexpr_ctx *ctx /* = nullptr */) +{ + /* Nobody wants to see the artificial (bool) cast. */ + bad = tree_strip_nop_conversions (bad); + if (TREE_CODE (bad) == CLEANUP_POINT_EXPR) + bad = TREE_OPERAND (bad, 0); + + /* Actually explain the failure if this is a concept check or a + requires-expression. */ + if (concept_check_p (bad) || TREE_CODE (bad) == REQUIRES_EXPR) + diagnose_constraints (cloc, bad, NULL_TREE); + else if (COMPARISON_CLASS_P (bad) + && ARITHMETIC_TYPE_P (TREE_TYPE (TREE_OPERAND (bad, 0)))) + { + tree op0 = fold_operand (TREE_OPERAND (bad, 0), ctx); + tree op1 = fold_operand (TREE_OPERAND (bad, 1), ctx); + tree cond = build2 (TREE_CODE (bad), boolean_type_node, op0, op1); + inform (cloc, "the comparison reduces to %qE", cond); + } + else if (show_expr_p) + inform (cloc, "%qE evaluates to false", bad); +} + /* Evaluate a call T to a GCC internal function when possible and return the evaluated result or, under the control of CTX, give an error, set NON_CONSTANT_P, and return the unevaluated call T otherwise. */ @@ -1865,18 +1956,14 @@ cxx_eval_internal_function (const constexpr_ctx *ctx, tree t, return void_node; case IFN_ASSUME: - /* For now, restrict constexpr evaluation of [[assume (cond)]] - only to the cases which don't have side-effects. Evaluating - it even when it does would mean we'd need to somehow undo - all the side-effects e.g. in ctx->global->values. */ - if (!TREE_SIDE_EFFECTS (CALL_EXPR_ARG (t, 0)) - /* And it needs to be a potential constant expression. */ - && potential_rvalue_constant_expression (CALL_EXPR_ARG (t, 0))) + if (potential_rvalue_constant_expression (CALL_EXPR_ARG (t, 0))) { constexpr_ctx new_ctx = *ctx; new_ctx.quiet = true; tree arg = CALL_EXPR_ARG (t, 0); bool new_non_constant_p = false, new_overflow_p = false; + /* Avoid modification of existing values. */ + modifiable_tracker ms (new_ctx.global); arg = cxx_eval_constant_expression (&new_ctx, arg, vc_prvalue, &new_non_constant_p, &new_overflow_p); @@ -1897,7 +1984,7 @@ cxx_eval_internal_function (const constexpr_ctx *ctx, tree t, /* Report the error. */ error_at (cloc, "failed %<assume%> attribute assumption"); - diagnose_failing_condition (bad, cloc, false); + diagnose_failing_condition (bad, cloc, false, &new_ctx); } *non_constant_p = true; @@ -2576,7 +2663,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, // See PR98988 and PR99031. varpool_node::finalize_decl (var); ctx->global->heap_vars.safe_push (var); - ctx->global->values.put (var, NULL_TREE); + ctx->global->put_value (var, NULL_TREE); return fold_convert (ptr_type_node, build_address (var)); } else @@ -2604,7 +2691,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, return t; } DECL_NAME (var) = heap_deleted_identifier; - ctx->global->values.remove (var); + ctx->global->remove_value (var); ctx->global->heap_dealloc_count++; return void_node; } @@ -2626,7 +2713,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, return t; } DECL_NAME (var) = heap_deleted_identifier; - ctx->global->values.remove (var); + ctx->global->remove_value (var); ctx->global->heap_dealloc_count++; return void_node; } @@ -2689,7 +2776,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, new_ctx.object = AGGR_INIT_EXPR_SLOT (t); tree ctor = new_ctx.ctor = build_constructor (DECL_CONTEXT (fun), NULL); CONSTRUCTOR_NO_CLEARING (ctor) = true; - ctx->global->values.put (new_ctx.object, ctor); + ctx->global->put_value (new_ctx.object, ctor); ctx = &new_ctx; } @@ -2912,12 +2999,12 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, if (TREE_CODE (arg) == CONSTRUCTOR) vec_safe_push (ctors, arg); } - ctx->global->values.put (remapped, arg); + ctx->global->put_value (remapped, arg); remapped = DECL_CHAIN (remapped); } /* Add the RESULT_DECL to the values map, too. */ gcc_assert (!DECL_BY_REFERENCE (res)); - ctx->global->values.put (res, NULL_TREE); + ctx->global->put_value (res, NULL_TREE); /* Track the callee's evaluated SAVE_EXPRs and TARGET_EXPRs so that we can forget their values after the call. */ @@ -2972,7 +3059,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, result = void_node; else { - result = *ctx->global->values.get (res); + result = ctx->global->get_value (res); if (result == NULL_TREE && !*non_constant_p && !DECL_DESTRUCTOR_P (fun)) { @@ -2994,15 +3081,15 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, /* Forget the saved values of the callee's SAVE_EXPRs and TARGET_EXPRs. */ for (tree save_expr : save_exprs) - ctx->global->values.remove (save_expr); + ctx->global->remove_value (save_expr); /* Remove the parms/result from the values map. Is it worth bothering to do this when the map itself is only live for one constexpr evaluation? If so, maybe also clear out other vars from call, maybe in BIND_EXPR handling? */ - ctx->global->values.remove (res); + ctx->global->remove_value (res); for (tree parm = parms; parm; parm = TREE_CHAIN (parm)) - ctx->global->values.remove (parm); + ctx->global->remove_value (parm); /* Free any parameter CONSTRUCTORs we aren't returning directly. */ while (!ctors->is_empty ()) @@ -3104,12 +3191,12 @@ reduced_constant_expression_p (tree t) case CONSTRUCTOR: /* And we need to handle PTRMEM_CST wrapped in a CONSTRUCTOR. */ tree field; + if (TREE_CODE (TREE_TYPE (t)) == VECTOR_TYPE) + /* An initialized vector would have a VECTOR_CST. */ + return false; if (CONSTRUCTOR_NO_CLEARING (t)) { - if (TREE_CODE (TREE_TYPE (t)) == VECTOR_TYPE) - /* An initialized vector would have a VECTOR_CST. */ - return false; - else if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) { /* There must be a valid constant initializer at every array index. */ @@ -4839,7 +4926,7 @@ verify_ctor_sanity (const constexpr_ctx *ctx, tree type) (TREE_TYPE (type), TREE_TYPE (otype))))); } gcc_assert (!ctx->object || !DECL_P (ctx->object) - || *(ctx->global->values.get (ctx->object)) == ctx->ctor); + || ctx->global->get_value (ctx->object) == ctx->ctor); } /* Subroutine of cxx_eval_constant_expression. @@ -4956,8 +5043,14 @@ cxx_eval_bare_aggregate (const constexpr_ctx *ctx, tree t, TREE_SIDE_EFFECTS (ctx->ctor) = side_effects_p; } } - if (*non_constant_p || !changed) + if (*non_constant_p) return t; + if (!changed) + { + if (VECTOR_TYPE_P (type)) + t = fold (t); + return t; + } t = ctx->ctor; if (!t) t = build_constructor (type, NULL); @@ -5155,7 +5248,7 @@ cxx_eval_vec_init (const constexpr_ctx *ctx, tree t, new_ctx.object = VEC_INIT_EXPR_SLOT (t); tree ctor = new_ctx.ctor = build_constructor (atype, NULL); CONSTRUCTOR_NO_CLEARING (ctor) = true; - ctx->global->values.put (new_ctx.object, ctor); + ctx->global->put_value (new_ctx.object, ctor); ctx = &new_ctx; } init = expand_vec_init_expr (ctx->object, t, complain); @@ -5841,7 +5934,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, we're initializing. */ tree *valp; if (DECL_P (object)) - valp = ctx->global->values.get (object); + valp = ctx->global->get_value_ptr (object); else valp = NULL; if (!valp) @@ -6073,7 +6166,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, /* The hash table might have moved since the get earlier, and the initializer might have mutated the underlying CONSTRUCTORs, so we must recompute VALP. */ - valp = ctx->global->values.get (object); + valp = ctx->global->get_value_ptr (object); for (unsigned i = 0; i < vec_safe_length (indexes); i++) { ctors[i] = valp; @@ -6228,6 +6321,18 @@ cxx_eval_increment_expression (const constexpr_ctx *ctx, tree t, offset = fold_build1 (NEGATE_EXPR, TREE_TYPE (offset), offset); mod = fold_build2 (POINTER_PLUS_EXPR, type, val, offset); } + else if (c_promoting_integer_type_p (type) + && !TYPE_UNSIGNED (type) + && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) + { + offset = fold_convert (integer_type_node, offset); + mod = fold_convert (integer_type_node, val); + tree t = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, integer_type_node, + mod, offset); + mod = fold_convert (type, t); + if (TREE_OVERFLOW_P (mod) && !TREE_OVERFLOW_P (t)) + TREE_OVERFLOW (mod) = false; + } else mod = fold_build2 (inc ? PLUS_EXPR : MINUS_EXPR, type, val, offset); if (!ptr) @@ -6491,7 +6596,7 @@ cxx_eval_loop_expr (const constexpr_ctx *ctx, tree t, /* Forget saved values of SAVE_EXPRs and TARGET_EXPRs. */ for (tree save_expr : save_exprs) - ctx->global->values.remove (save_expr); + ctx->global->remove_value (save_expr); save_exprs.truncate (0); if (++count >= constexpr_loop_limit) @@ -6513,7 +6618,7 @@ cxx_eval_loop_expr (const constexpr_ctx *ctx, tree t, /* Forget saved values of SAVE_EXPRs and TARGET_EXPRs. */ for (tree save_expr : save_exprs) - ctx->global->values.remove (save_expr); + ctx->global->remove_value (save_expr); return NULL_TREE; } @@ -6810,8 +6915,8 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, /* We ask for an rvalue for the RESULT_DECL when indirecting through an invisible reference, or in named return value optimization. */ - if (tree *p = ctx->global->values.get (t)) - return *p; + if (tree v = ctx->global->get_value (t)) + return v; else { if (!ctx->quiet) @@ -6854,10 +6959,9 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, else if (t == ctx->object) return ctx->ctor; if (VAR_P (t)) - if (tree *p = ctx->global->values.get (t)) - if (*p != NULL_TREE) + if (tree v = ctx->global->get_value (t)) { - r = *p; + r = v; break; } if (ctx->manifestly_const_eval) @@ -6900,8 +7004,8 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, case PARM_DECL: if (lval && !TYPE_REF_P (TREE_TYPE (t))) /* glvalue use. */; - else if (tree *p = ctx->global->values.get (r)) - r = *p; + else if (tree v = ctx->global->get_value (r)) + r = v; else if (lval) /* Defer in case this is only used for its type. */; else if (COMPLETE_TYPE_P (TREE_TYPE (t)) @@ -6960,7 +7064,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, new_ctx.object = r; new_ctx.ctor = build_constructor (TREE_TYPE (r), NULL); CONSTRUCTOR_NO_CLEARING (new_ctx.ctor) = true; - ctx->global->values.put (r, new_ctx.ctor); + ctx->global->put_value (r, new_ctx.ctor); ctx = &new_ctx; } @@ -6975,12 +7079,12 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, if (CLASS_TYPE_P (TREE_TYPE (r)) && CP_TYPE_CONST_P (TREE_TYPE (r))) TREE_READONLY (init) = true; - ctx->global->values.put (r, init); + ctx->global->put_value (r, init); } else if (ctx == &new_ctx) /* We gave it a CONSTRUCTOR above. */; else - ctx->global->values.put (r, NULL_TREE); + ctx->global->put_value (r, NULL_TREE); } break; @@ -7003,11 +7107,11 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, gcc_checking_assert (!TARGET_EXPR_DIRECT_INIT_P (t)); /* Avoid evaluating a TARGET_EXPR more than once. */ tree slot = TARGET_EXPR_SLOT (t); - if (tree *p = ctx->global->values.get (slot)) + if (tree v = ctx->global->get_value (slot)) { if (lval) return slot; - r = *p; + r = v; break; } if ((AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))) @@ -7023,7 +7127,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, new_ctx.ctor = build_constructor (type, NULL); CONSTRUCTOR_NO_CLEARING (new_ctx.ctor) = true; new_ctx.object = slot; - ctx->global->values.put (new_ctx.object, new_ctx.ctor); + ctx->global->put_value (new_ctx.object, new_ctx.ctor); ctx = &new_ctx; } /* Pass vc_prvalue because this indicates @@ -7037,7 +7141,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, if (TARGET_EXPR_CLEANUP (t) && !CLEANUP_EH_ONLY (t)) ctx->global->cleanups->safe_push (TARGET_EXPR_CLEANUP (t)); r = unshare_constructor (r); - ctx->global->values.put (slot, r); + ctx->global->put_value (slot, r); if (ctx->save_exprs) ctx->save_exprs->safe_push (slot); if (lval) @@ -7080,15 +7184,15 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, case SAVE_EXPR: /* Avoid evaluating a SAVE_EXPR more than once. */ - if (tree *p = ctx->global->values.get (t)) - r = *p; + if (tree v = ctx->global->get_value (t)) + r = v; else { r = cxx_eval_constant_expression (ctx, TREE_OPERAND (t, 0), vc_prvalue, non_constant_p, overflow_p); if (*non_constant_p) break; - ctx->global->values.put (t, r); + ctx->global->put_value (t, r); if (ctx->save_exprs) ctx->save_exprs->safe_push (t); } @@ -7387,11 +7491,10 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, case CONSTRUCTOR: if (TREE_CONSTANT (t) && reduced_constant_expression_p (t)) { - /* Don't re-process a constant CONSTRUCTOR, but do fold it to - VECTOR_CST if applicable. */ + /* Don't re-process a constant CONSTRUCTOR. */ verify_constructor_flags (t); if (TREE_CONSTANT (t)) - return fold (t); + return t; } r = cxx_eval_bare_aggregate (ctx, t, lval, non_constant_p, overflow_p); @@ -8024,7 +8127,7 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, gcc_assert (same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (object))); if (object && DECL_P (object)) - global_ctx.values.put (object, ctx.ctor); + global_ctx.put_value (object, ctx.ctor); if (TREE_CODE (r) == TARGET_EXPR) /* Avoid creating another CONSTRUCTOR when we expand the TARGET_EXPR. */ diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 74898ca..5e6a3bc 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -323,7 +323,7 @@ resolve_function_concept_overload (tree ovl, tree args) /* Remember the candidate if we can deduce a substitution. */ ++processing_template_decl; tree parms = TREE_VALUE (DECL_TEMPLATE_PARMS (tmpl)); - if (tree subst = coerce_template_parms (parms, args, tmpl)) + if (tree subst = coerce_template_parms (parms, args, tmpl, tf_none)) { if (subst == error_mark_node) ++nerrs; @@ -404,7 +404,7 @@ resolve_concept_check (tree check) tree args = TREE_OPERAND (id, 1); tree parms = INNERMOST_TEMPLATE_PARMS (DECL_TEMPLATE_PARMS (tmpl)); ++processing_template_decl; - tree result = coerce_template_parms (parms, args, tmpl); + tree result = coerce_template_parms (parms, args, tmpl, tf_none); --processing_template_decl; if (result == error_mark_node) return error_mark_node; @@ -726,7 +726,7 @@ normalize_concept_check (tree check, tree args, norm_info info) /* Turn on template processing; coercing non-type template arguments will automatically assume they're non-dependent. */ ++processing_template_decl; - tree subst = coerce_template_parms (parms, targs, tmpl); + tree subst = coerce_template_parms (parms, targs, tmpl, tf_none); --processing_template_decl; if (subst == error_mark_node) return error_mark_node; diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index 28c3398..cc8bfad 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -3378,10 +3378,10 @@ fold_builtin_source_location (location_t loc) if (const char *fname = LOCATION_FILE (loc)) { fname = remap_macro_filename (fname); - val = build_string_literal (strlen (fname) + 1, fname); + val = build_string_literal (fname); } else - val = build_string_literal (1, ""); + val = build_string_literal (""); } else if (strcmp (n, "_M_function_name") == 0) { @@ -3390,7 +3390,7 @@ fold_builtin_source_location (location_t loc) if (current_function_decl) name = cxx_printable_name (current_function_decl, 2); - val = build_string_literal (strlen (name) + 1, name); + val = build_string_literal (name); } else if (strcmp (n, "_M_line") == 0) val = build_int_cst (TREE_TYPE (field), LOCATION_LINE (loc)); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 60a2510..867096b 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -772,11 +772,20 @@ typedef struct ptrmem_cst * ptrmem_cst_t; /* Returns nonzero iff NODE is a declaration for the global function `main'. */ -#define DECL_MAIN_P(NODE) \ +#define DECL_MAIN_ANY_P(NODE) \ (DECL_EXTERN_C_FUNCTION_P (NODE) \ && DECL_NAME (NODE) != NULL_TREE \ - && MAIN_NAME_P (DECL_NAME (NODE)) \ - && flag_hosted) + && MAIN_NAME_P (DECL_NAME (NODE))) + +/* Nonzero iff NODE is a declaration for `int main', or we are hosted. */ +#define DECL_MAIN_FREESTANDING_P(NODE) \ + (DECL_MAIN_ANY_P(NODE) \ + && (flag_hosted \ + || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (NODE))) \ + == integer_type_node)) + +/* Nonzero iff NODE is a declaration for `main', and we are hosted. */ +#define DECL_MAIN_P(NODE) (DECL_MAIN_ANY_P(NODE) && flag_hosted) /* Lookup walker marking. */ #define LOOKUP_SEEN_P(NODE) TREE_VISITED (NODE) @@ -7459,8 +7468,8 @@ extern tree get_function_template_decl (const_tree); extern tree resolve_nondeduced_context (tree, tsubst_flags_t); extern tree resolve_nondeduced_context_or_error (tree, tsubst_flags_t); extern hashval_t iterative_hash_template_arg (tree arg, hashval_t val); -extern tree coerce_template_parms (tree, tree, tree); -extern tree coerce_template_parms (tree, tree, tree, tsubst_flags_t); +extern tree coerce_template_parms (tree, tree, tree, tsubst_flags_t, + bool = true); extern tree canonicalize_type_argument (tree, tsubst_flags_t); extern void register_local_specialization (tree, tree); extern tree retrieve_local_specialization (tree); @@ -7749,7 +7758,6 @@ extern tree build_transaction_expr (location_t, tree, int, tree); extern bool cxx_omp_create_clause_info (tree, tree, bool, bool, bool, bool); extern tree baselink_for_fns (tree); -extern void diagnose_failing_condition (tree, location_t, bool); extern void finish_static_assert (tree, tree, location_t, bool, bool); extern tree finish_decltype_type (tree, bool, tsubst_flags_t); @@ -8488,7 +8496,9 @@ extern void clear_cv_and_fold_caches (void); extern tree unshare_constructor (tree CXX_MEM_STAT_INFO); extern bool decl_implicit_constexpr_p (tree); struct constexpr_ctx; -extern tree find_failing_clause (constexpr_ctx *ctx, tree); +extern tree find_failing_clause (const constexpr_ctx *ctx, tree); +extern void diagnose_failing_condition (tree, location_t, bool, + const constexpr_ctx * = nullptr); extern bool replace_decl (tree *, tree, tree); /* An RAII sentinel used to restrict constexpr evaluation so that it diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 85b892c..bc085f8 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -17852,7 +17852,8 @@ finish_function (bool inline_p) if (!DECL_CLONED_FUNCTION_P (fndecl)) { /* Make it so that `main' always returns 0 by default. */ - if (DECL_MAIN_P (current_function_decl)) + if (DECL_MAIN_FREESTANDING_P (current_function_decl) + && !TREE_THIS_VOLATILE (current_function_decl)) finish_return_stmt (integer_zero_node); if (use_eh_spec_block (current_function_decl)) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 73971e7..9957df5 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -6017,9 +6017,17 @@ trees_out::core_vals (tree t) if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) { + if (code == ENUMERAL_TYPE) + { + /* These fields get set even for opaque enums that lack a + definition, so we stream them directly for each ENUMERAL_TYPE. + We stream TYPE_VALUES as part of the definition. */ + WT (t->type_non_common.maxval); + WT (t->type_non_common.minval); + } /* Records and unions hold FIELDS, VFIELD & BINFO on these things. */ - if (!RECORD_OR_UNION_CODE_P (code) && code != ENUMERAL_TYPE) + else if (!RECORD_OR_UNION_CODE_P (code)) { // FIXME: These are from tpl_parm_value's 'type' writing. // Perhaps it should just be doing them directly? @@ -6530,9 +6538,17 @@ trees_in::core_vals (tree t) if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) { + if (code == ENUMERAL_TYPE) + { + /* These fields get set even for opaque enums that lack a + definition, so we stream them directly for each ENUMERAL_TYPE. + We stream TYPE_VALUES as part of the definition. */ + RT (t->type_non_common.maxval); + RT (t->type_non_common.minval); + } /* Records and unions hold FIELDS, VFIELD & BINFO on these things. */ - if (!RECORD_OR_UNION_CODE_P (code) && code != ENUMERAL_TYPE) + else if (!RECORD_OR_UNION_CODE_P (code)) { /* This is not clobbering TYPE_CACHED_VALUES, because this is a type that doesn't have any. */ @@ -12217,8 +12233,8 @@ trees_out::write_enum_def (tree decl) tree type = TREE_TYPE (decl); tree_node (TYPE_VALUES (type)); - tree_node (TYPE_MIN_VALUE (type)); - tree_node (TYPE_MAX_VALUE (type)); + /* Note that we stream TYPE_MIN/MAX_VALUE directly as part of the + ENUMERAL_TYPE. */ } void @@ -12242,8 +12258,6 @@ trees_in::read_enum_def (tree defn, tree maybe_template) { tree type = TREE_TYPE (defn); tree values = tree_node (); - tree min = tree_node (); - tree max = tree_node (); if (get_overrun ()) return false; @@ -12254,8 +12268,8 @@ trees_in::read_enum_def (tree defn, tree maybe_template) if (installing) { TYPE_VALUES (type) = values; - TYPE_MIN_VALUE (type) = min; - TYPE_MAX_VALUE (type) = max; + /* Note that we stream TYPE_MIN/MAX_VALUE directly as part of the + ENUMERAL_TYPE. */ rest_of_type_compilation (type, DECL_NAMESPACE_SCOPE_P (defn)); } @@ -12269,22 +12283,17 @@ trees_in::read_enum_def (tree defn, tree maybe_template) tree new_decl = TREE_VALUE (values); if (DECL_NAME (known_decl) != DECL_NAME (new_decl)) - goto bad; + break; new_decl = maybe_duplicate (new_decl); if (!cp_tree_equal (DECL_INITIAL (known_decl), DECL_INITIAL (new_decl))) - goto bad; + break; } if (known || values) - goto bad; - - if (!cp_tree_equal (TYPE_MIN_VALUE (type), min) - || !cp_tree_equal (TYPE_MAX_VALUE (type), max)) { - bad:; error_at (DECL_SOURCE_LOCATION (maybe_dup), "definition of %qD does not match", maybe_dup); inform (DECL_SOURCE_LOCATION (defn), diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index a39c5f0..e685f19 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -48996,12 +48996,11 @@ synthesize_implicit_template_parm (cp_parser *parser, tree constr) tree proto = constr ? DECL_INITIAL (constr) : NULL_TREE; tree synth_id = make_generic_type_name (); - tree synth_tmpl_parm; bool non_type = false; /* Synthesize the type template parameter. */ gcc_assert(!proto || TREE_CODE (proto) == TYPE_DECL); - synth_tmpl_parm = finish_template_type_parm (class_type_node, synth_id); + tree synth_tmpl_parm = finish_template_type_parm (class_type_node, synth_id); if (become_template) current_template_parms = tree_cons (size_int (current_template_depth + 1), @@ -49016,22 +49015,27 @@ synthesize_implicit_template_parm (cp_parser *parser, tree constr) node, /*non_type=*/non_type, /*param_pack=*/false); + // Process_template_parm returns the list of parms, and + // parser->implicit_template_parms holds the final node of the parm + // list. We really want to manipulate the newly appended element. + gcc_checking_assert (!parser->implicit_template_parms + || parser->implicit_template_parms == new_parm); + if (parser->implicit_template_parms) + new_parm = TREE_CHAIN (new_parm); + gcc_checking_assert (!TREE_CHAIN (new_parm)); + + // Record the last implicit parm node + parser->implicit_template_parms = new_parm; /* Mark the synthetic declaration "virtual". This is used when comparing template-heads to determine if whether an abbreviated function template is equivalent to an explicit template. - Note that DECL_ARTIFICIAL is used elsewhere for template parameters. */ + Note that DECL_ARTIFICIAL is used elsewhere for template + parameters. */ if (TREE_VALUE (new_parm) != error_mark_node) DECL_VIRTUAL_P (TREE_VALUE (new_parm)) = true; - // Chain the new parameter to the list of implicit parameters. - if (parser->implicit_template_parms) - parser->implicit_template_parms - = TREE_CHAIN (parser->implicit_template_parms); - else - parser->implicit_template_parms = new_parm; - tree new_decl = get_local_decls (); if (non_type) /* Return the TEMPLATE_PARM_INDEX, not the PARM_DECL. */ @@ -49059,7 +49063,7 @@ synthesize_implicit_template_parm (cp_parser *parser, tree constr) /* If the new parameter was constrained, we need to add that to the constraints in the template parameter list. */ - if (tree req = TEMPLATE_PARM_CONSTRAINTS (tree_last (new_parm))) + if (tree req = TEMPLATE_PARM_CONSTRAINTS (new_parm)) { tree reqs = TEMPLATE_PARMS_CONSTRAINTS (current_template_parms); reqs = combine_constraint_expressions (reqs, req); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 1289aab..51bfbbc 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -148,10 +148,8 @@ static void add_pending_template (tree); static tree reopen_tinst_level (struct tinst_level *); static tree tsubst_initializer_list (tree, tree); static tree get_partial_spec_bindings (tree, tree, tree); -static tree coerce_template_parms (tree, tree, tree, tsubst_flags_t, - bool, bool); static tree coerce_innermost_template_parms (tree, tree, tree, tsubst_flags_t, - bool, bool); + bool = true); static void tsubst_enum (tree, tree, tree); static bool check_instantiated_args (tree, tree, tsubst_flags_t); static int check_non_deducible_conversion (tree, tree, unification_kind_t, int, @@ -2172,8 +2170,7 @@ determine_specialization (tree template_id, { tree parms = INNERMOST_TEMPLATE_PARMS (DECL_TEMPLATE_PARMS (fns)); targs = coerce_template_parms (parms, explicit_targs, fns, - tf_warning_or_error, - /*req_all*/true, /*use_defarg*/true); + tf_warning_or_error); if (targs != error_mark_node && constraints_satisfied_p (fns, targs)) templates = tree_cons (targs, fns, templates); @@ -7833,10 +7830,7 @@ coerce_template_args_for_ttp (tree templ, tree arglist, arglist = add_to_template_args (outer, arglist); tree parmlist = DECL_INNERMOST_TEMPLATE_PARMS (templ); - return coerce_template_parms (parmlist, arglist, templ, - complain, - /*require_all_args=*/true, - /*use_default_args=*/true); + return coerce_template_parms (parmlist, arglist, templ, complain); } /* A cache of template template parameters with match-all default @@ -7910,9 +7904,7 @@ coerce_ttp_args_for_tta (tree& arg, tree pargs, tsubst_flags_t complain) { tree aparms = INNERMOST_TEMPLATE_PARMS (DECL_TEMPLATE_PARMS (arg_tmpl)); - pargs = coerce_template_parms (aparms, pargs, arg_tmpl, complain, - /*require_all*/true, - /*use_default*/true); + pargs = coerce_template_parms (aparms, pargs, arg_tmpl, complain); } --processing_template_decl; return pargs; @@ -8079,8 +8071,7 @@ coerce_template_template_parms (tree parm_parms_full, pargs = add_to_template_args (outer_args, pargs); } - pargs = coerce_template_parms (arg_parms, pargs, NULL_TREE, tf_none, - /*require_all*/true, /*use_default*/true); + pargs = coerce_template_parms (arg_parms, pargs, NULL_TREE, tf_none); if (pargs != error_mark_node) { tree targs = make_tree_vec (nargs); @@ -8840,19 +8831,16 @@ pack_expansion_args_count (tree args) warning messages are issued under control of COMPLAIN. If REQUIRE_ALL_ARGS is false, argument deduction will be performed - for arguments not specified in ARGS. Otherwise, if - USE_DEFAULT_ARGS is true, default arguments will be used to fill in - unspecified arguments. If REQUIRE_ALL_ARGS is true, but - USE_DEFAULT_ARGS is false, then all arguments must be specified in - ARGS. */ + for arguments not specified in ARGS. If REQUIRE_ALL_ARGS is true, + arguments not specified in ARGS must have default arguments which + we'll use to fill in ARGS. */ -static tree +tree coerce_template_parms (tree parms, tree args, tree in_decl, tsubst_flags_t complain, - bool require_all_args, - bool use_default_args) + bool require_all_args /* = true */) { int nparms, nargs, parm_idx, arg_idx, lost = 0; tree orig_inner_args; @@ -8913,9 +8901,8 @@ coerce_template_parms (tree parms, || (nargs < nparms - variadic_p && require_all_args && !variadic_args_p - && (!use_default_args - || (TREE_VEC_ELT (parms, nargs) != error_mark_node - && !TREE_PURPOSE (TREE_VEC_ELT (parms, nargs)))))) + && (TREE_VEC_ELT (parms, nargs) != error_mark_node + && !TREE_PURPOSE (TREE_VEC_ELT (parms, nargs))))) { bad_nargs: if (complain & tf_error) @@ -9183,30 +9170,6 @@ coerce_template_parms (tree parms, return new_inner_args; } -/* Convert all template arguments to their appropriate types, and - return a vector containing the innermost resulting template - arguments. If any error occurs, return error_mark_node. Error and - warning messages are not issued. - - Note that no function argument deduction is performed, and default - arguments are used to fill in unspecified arguments. */ -tree -coerce_template_parms (tree parms, tree args, tree in_decl) -{ - return coerce_template_parms (parms, args, in_decl, tf_none, true, true); -} - -/* Convert all template arguments to their appropriate type, and - instantiate default arguments as needed. This returns a vector - containing the innermost resulting template arguments, or - error_mark_node if unsuccessful. */ -tree -coerce_template_parms (tree parms, tree args, tree in_decl, - tsubst_flags_t complain) -{ - return coerce_template_parms (parms, args, in_decl, complain, true, true); -} - /* Like coerce_template_parms. If PARMS represents all template parameters levels, this function returns a vector of vectors representing all the resulting argument levels. Note that in this @@ -9219,11 +9182,10 @@ coerce_template_parms (tree parms, tree args, tree in_decl, static tree coerce_innermost_template_parms (tree parms, - tree args, - tree in_decl, - tsubst_flags_t complain, - bool require_all_args, - bool use_default_args) + tree args, + tree in_decl, + tsubst_flags_t complain, + bool require_all_args /* = true */) { int parms_depth = TMPL_PARMS_DEPTH (parms); int args_depth = TMPL_ARGS_DEPTH (args); @@ -9243,8 +9205,7 @@ coerce_innermost_template_parms (tree parms, if (cur_depth == args_depth) l = coerce_template_parms (TREE_VALUE (level), args, in_decl, complain, - require_all_args, - use_default_args); + require_all_args); else l = TMPL_ARGS_LEVEL (args, cur_depth); @@ -9257,8 +9218,7 @@ coerce_innermost_template_parms (tree parms, else coerced_args = coerce_template_parms (INNERMOST_TEMPLATE_PARMS (parms), args, in_decl, complain, - require_all_args, - use_default_args); + require_all_args); return coerced_args; } @@ -9953,9 +9913,7 @@ lookup_template_class (tree d1, tree arglist, tree in_decl, tree context, actually tsubst'd into the definition to create the instantiation. */ arglist = coerce_innermost_template_parms (parmlist, arglist, gen_tmpl, - complain, - /*require_all_args=*/true, - /*use_default_args=*/true); + complain); if (arglist == error_mark_node) /* We were unable to bind the arguments. */ @@ -10371,9 +10329,7 @@ finish_template_variable (tree var, tsubst_flags_t complain) tree arglist = TREE_OPERAND (var, 1); tree parms = DECL_TEMPLATE_PARMS (templ); - arglist = coerce_innermost_template_parms (parms, arglist, templ, complain, - /*req_all*/true, - /*use_default*/true); + arglist = coerce_innermost_template_parms (parms, arglist, templ, complain); if (arglist == error_mark_node) return error_mark_node; @@ -15022,8 +14978,7 @@ tsubst_decl (tree t, tree args, tsubst_flags_t complain) the template. */ argvec = (coerce_innermost_template_parms (DECL_TEMPLATE_PARMS (gen_tmpl), - argvec, t, complain, - /*all*/true, /*defarg*/true)); + argvec, t, complain)); if (argvec == error_mark_node) RETURN (error_mark_node); hash = spec_hasher::hash (gen_tmpl, argvec); @@ -21956,11 +21911,8 @@ instantiate_alias_template (tree tmpl, tree args, tsubst_flags_t complain) if (tmpl == error_mark_node || args == error_mark_node) return error_mark_node; - args = - coerce_innermost_template_parms (DECL_TEMPLATE_PARMS (tmpl), - args, tmpl, complain, - /*require_all_args=*/true, - /*use_default_args=*/true); + args = coerce_innermost_template_parms (DECL_TEMPLATE_PARMS (tmpl), + args, tmpl, complain); /* FIXME check for satisfaction in check_instantiated_args. */ if (flag_concepts @@ -22210,8 +22162,7 @@ fn_type_unification (tree fn, explicit_targs = (coerce_template_parms (tparms, explicit_targs, fn, complain|tf_partial, - /*require_all_args=*/false, - /*use_default_args=*/false)); + /*require_all_args=*/false)); if (explicit_targs == error_mark_node) goto fail; @@ -23304,9 +23255,7 @@ resolve_overloaded_unification (tree tparms, continue; subargs = coerce_template_parms (DECL_INNERMOST_TEMPLATE_PARMS (fn), - expl_subargs, NULL_TREE, tf_none, - /*require_all_args=*/true, - /*use_default_args=*/true); + expl_subargs, NULL_TREE, tf_none); if (subargs != error_mark_node && !any_dependent_template_arguments_p (subargs)) { @@ -23450,9 +23399,7 @@ resolve_nondeduced_context (tree orig_expr, tsubst_flags_t complain) continue; subargs = coerce_template_parms (DECL_INNERMOST_TEMPLATE_PARMS (fn), - expl_subargs, NULL_TREE, tf_none, - /*require_all_args=*/true, - /*use_default_args=*/true); + expl_subargs, NULL_TREE, tf_none); if (subargs != error_mark_node && !any_dependent_template_arguments_p (subargs)) { @@ -25581,7 +25528,7 @@ get_partial_spec_bindings (tree tmpl, tree spec_tmpl, tree args) if (spec_args != error_mark_node) spec_args = coerce_template_parms (DECL_INNERMOST_TEMPLATE_PARMS (tmpl), INNERMOST_TEMPLATE_ARGS (spec_args), - tmpl, tf_none, false, false); + tmpl, tf_none, false); pop_tinst_level (); diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 82f9dd8..36aa9c4 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -11183,33 +11183,6 @@ init_cp_semantics (void) } -/* Emit additional diagnostics for failing condition BAD. - Used by finish_static_assert and IFN_ASSUME constexpr diagnostics. - If SHOW_EXPR_P is true, print the condition (because it was - instantiation-dependent). */ - -void -diagnose_failing_condition (tree bad, location_t cloc, bool show_expr_p) -{ - /* Nobody wants to see the artificial (bool) cast. */ - bad = tree_strip_nop_conversions (bad); - - /* Actually explain the failure if this is a concept check or a - requires-expression. */ - if (concept_check_p (bad) || TREE_CODE (bad) == REQUIRES_EXPR) - diagnose_constraints (cloc, bad, NULL_TREE); - else if (COMPARISON_CLASS_P (bad) - && ARITHMETIC_TYPE_P (TREE_TYPE (TREE_OPERAND (bad, 0)))) - { - tree op0 = fold_non_dependent_expr (TREE_OPERAND (bad, 0)); - tree op1 = fold_non_dependent_expr (TREE_OPERAND (bad, 1)); - tree cond = build2 (TREE_CODE (bad), boolean_type_node, op0, op1); - inform (cloc, "the comparison reduces to %qE", cond); - } - else if (show_expr_p) - inform (cloc, "%qE evaluates to false", bad); -} - /* Build a STATIC_ASSERT for a static assertion with the condition CONDITION and the message text MESSAGE. LOCATION is the location of the static assertion in the source code. When MEMBER_P, this diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 16e7d85..ab6979b 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -5191,6 +5191,8 @@ cp_build_binary_op (const op_location_t &location, orig_type0 = type0 = TREE_TYPE (op0); orig_type1 = type1 = TREE_TYPE (op1); + tree non_ep_op0 = op0; + tree non_ep_op1 = op1; /* The expression codes of the data types of the arguments tell us whether the arguments are integers, floating, pointers, etc. */ @@ -5303,8 +5305,9 @@ cp_build_binary_op (const op_location_t &location, if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE) || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE)) { - enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1, - complain & tf_error); + enum stv_conv convert_flag + = scalar_to_vector (location, code, non_ep_op0, non_ep_op1, + complain & tf_error); switch (convert_flag) { @@ -10726,7 +10729,12 @@ treat_lvalue_as_rvalue_p (tree expr, bool return_p) if (DECL_CONTEXT (retval) != current_function_decl) return NULL_TREE; if (return_p) - return set_implicit_rvalue_p (move (expr)); + { + expr = move (expr); + if (expr == error_mark_node) + return NULL_TREE; + return set_implicit_rvalue_p (expr); + } /* if the operand of a throw-expression is a (possibly parenthesized) id-expression that names an implicitly movable entity whose scope does not diff --git a/gcc/cp/vtable-class-hierarchy.cc b/gcc/cp/vtable-class-hierarchy.cc index cc1df1e..1e180ea 100644 --- a/gcc/cp/vtable-class-hierarchy.cc +++ b/gcc/cp/vtable-class-hierarchy.cc @@ -467,19 +467,6 @@ check_and_record_registered_pairs (tree vtable_decl, tree vptr_address, return !inserted_something; } -/* Given an IDENTIFIER_NODE, build and return a string literal based on it. */ - -static tree -build_string_from_id (tree identifier) -{ - int len; - - gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE); - - len = IDENTIFIER_LENGTH (identifier); - return build_string_literal (len + 1, IDENTIFIER_POINTER (identifier)); -} - /* A class may contain secondary vtables in it, for various reasons. This function goes through the decl chain of a class record looking for any fields that point to secondary vtables, and adding calls to @@ -920,7 +907,7 @@ register_all_pairs (tree body) if (flag_vtv_debug) - str1 = build_string_from_id (DECL_NAME (base_ptr_var_decl)); + str1 = build_string_literal (DECL_NAME (base_ptr_var_decl)); new_type = build_pointer_type (TREE_TYPE (base_ptr_var_decl)); arg1 = build1 (ADDR_EXPR, new_type, base_ptr_var_decl); @@ -953,7 +940,7 @@ register_all_pairs (tree body) if (vtable_decl) { vtable_should_be_output = TREE_ASM_WRITTEN (vtable_decl); - str2 = build_string_from_id (DECL_NAME (vtable_decl)); + str2 = build_string_literal (DECL_NAME (vtable_decl)); } if (vtable_decl && vtable_should_be_output) @@ -1009,8 +996,7 @@ register_all_pairs (tree body) arg2 = build_key_buffer_arg (base_ptr_var_decl); if (str2 == NULL_TREE) - str2 = build_string_literal (strlen ("unknown") + 1, - "unknown"); + str2 = build_string_literal ("unknown"); if (flag_vtv_debug) output_set_info (current->class_info->class_type, diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc index fc28d16..7110db4 100644 --- a/gcc/diagnostic-format-sarif.cc +++ b/gcc/diagnostic-format-sarif.cc @@ -595,7 +595,7 @@ sarif_builder::make_location_object (const diagnostic_event &event) json::object * sarif_builder::maybe_make_physical_location_object (location_t loc) { - if (loc <= BUILTINS_LOCATION) + if (loc <= BUILTINS_LOCATION || LOCATION_FILE (loc) == NULL) return NULL; json::object *phys_loc_obj = new json::object (); diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 112f83d..bc63a53 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -1523,6 +1523,25 @@ systems that support conditional traps). Division by zero checks use the break instruction. @end table +@item --with-compact-branches=@var{policy} +Specify how the compiler should generate branch instructions. +This option is only supported on the MIPS target. +The possibilities for @var{type} are: +@table @code +@item optimal +Cause a delay slot branch to be used if one is available in the +current ISA and the delay slot is successfully filled. If the delay slot +is not filled, a compact branch will be chosen if one is available. +@item never +Ensures that compact branch instructions will never be generated. +@item always +Ensures that a compact branch instruction will be generated if available. +If a compact branch instruction is not available, +a delay slot form of the branch will be used instead. +This option is supported from MIPS Release 6 onwards. +For pre-R6/microMIPS/MIPS16, this option is just same as never/optimal. +@end table + @c If you make --with-llsc the default for additional targets, @c update the --with-llsc description in the MIPS section below. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4db4bcb..ed79440 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10421,6 +10421,7 @@ of the following functions for working with file descriptors: @item @code{close} @item @code{creat} @item @code{dup}, @code{dup2} and @code{dup3} +@item @code{pipe}, and @code{pipe2} @item @code{read} @item @code{write} @end itemize @@ -27048,11 +27049,13 @@ The @option{-mcompact-branches=never} option ensures that compact branch instructions will never be generated. The @option{-mcompact-branches=always} option ensures that a compact -branch instruction will be generated if available. If a compact branch -instruction is not available, a delay slot form of the branch will be -used instead. +branch instruction will be generated if available for MIPS Release 6 onwards. +If a compact branch instruction is not available (or pre-R6), +a delay slot form of the branch will be used instead. -This option is supported from MIPS Release 6 onwards. +If it is used for MIPS16/microMIPS targets, it will be just ignored now. +The behaviour for MIPS16/microMIPS may change in future, +since they do have some compact branch instructions. The @option{-mcompact-branches=optimal} option will cause a delay slot branch to be used if one is available in the current ISA and the delay diff --git a/gcc/expr.cc b/gcc/expr.cc index efe387e..9145193 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7905,8 +7905,7 @@ get_inner_reference (tree exp, poly_int64_pod *pbitsize, /* For vector fields re-check the target flags, as DECL_MODE could have been set with different target flags than the current function has. */ - if (mode == BLKmode - && VECTOR_TYPE_P (TREE_TYPE (field)) + if (VECTOR_TYPE_P (TREE_TYPE (field)) && VECTOR_MODE_P (TYPE_MODE_RAW (TREE_TYPE (field)))) mode = TYPE_MODE (TREE_TYPE (field)); } diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index ab1810e..f764113 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,13 @@ +2022-10-21 José Rui Faustino de Sousa <jrfsousa@gmail.com> + + PR fortran/100097 + PR fortran/100098 + * trans-array.cc (gfc_trans_class_array): New function to + initialize class descriptor's TKR information. + * trans-array.h (gfc_trans_class_array): Add function prototype. + * trans-decl.cc (gfc_trans_deferred_vars): Add calls to the new + function for both pointers and allocatables. + 2022-10-20 Harald Anlauf <anlauf@gmx.de> Steven G. Kargl <kargl@gcc.gnu.org> diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 795ce14..514cb05 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -11125,6 +11125,52 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo *loop, } +/* Initialize class descriptor's TKR infomation. */ + +void +gfc_trans_class_array (gfc_symbol * sym, gfc_wrapped_block * block) +{ + tree type, etype; + tree tmp; + tree descriptor; + stmtblock_t init; + locus loc; + int rank; + + /* Make sure the frontend gets these right. */ + gcc_assert (sym->ts.type == BT_CLASS && CLASS_DATA (sym) + && (CLASS_DATA (sym)->attr.class_pointer + || CLASS_DATA (sym)->attr.allocatable)); + + gcc_assert (VAR_P (sym->backend_decl) + || TREE_CODE (sym->backend_decl) == PARM_DECL); + + if (sym->attr.dummy) + return; + + descriptor = gfc_class_data_get (sym->backend_decl); + type = TREE_TYPE (descriptor); + + if (type == NULL || !GFC_DESCRIPTOR_TYPE_P (type)) + return; + + gfc_save_backend_locus (&loc); + gfc_set_backend_locus (&sym->declared_at); + gfc_init_block (&init); + + rank = CLASS_DATA (sym)->as ? (CLASS_DATA (sym)->as->rank) : (0); + gcc_assert (rank>=0); + tmp = gfc_conv_descriptor_dtype (descriptor); + etype = gfc_get_element_type (type); + tmp = fold_build2_loc (input_location, MODIFY_EXPR, TREE_TYPE (tmp), tmp, + gfc_get_dtype_rank_type (rank, etype)); + gfc_add_expr_to_block (&init, tmp); + + gfc_add_init_cleanup (block, gfc_finish_block (&init), NULL_TREE); + gfc_restore_backend_locus (&loc); +} + + /* NULLIFY an allocatable/pointer array on function entry, free it on exit. Do likewise, recursively if necessary, with the allocatable components of derived types. This function is also called for assumed-rank arrays, which diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 04fee61..cd2b3d9 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -69,6 +69,8 @@ tree gfc_check_pdt_dummy (gfc_symbol *, tree, int, gfc_actual_arglist *); tree gfc_alloc_allocatable_for_assignment (gfc_loopinfo*, gfc_expr*, gfc_expr*); +/* Add initialization for class descriptors */ +void gfc_trans_class_array (gfc_symbol *, gfc_wrapped_block *); /* Add initialization for deferred arrays. */ void gfc_trans_deferred_array (gfc_symbol *, gfc_wrapped_block *); /* Generate an initializer for a static pointer or allocatable array. */ diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index 4b570c3..63515b9 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -4835,7 +4835,7 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) else if ((!sym->attr.dummy || sym->ts.deferred) && (sym->ts.type == BT_CLASS && CLASS_DATA (sym)->attr.class_pointer)) - continue; + gfc_trans_class_array (sym, block); else if ((!sym->attr.dummy || sym->ts.deferred) && (sym->attr.allocatable || (sym->attr.pointer && sym->attr.result) @@ -4919,6 +4919,10 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) tmp = NULL_TREE; } + /* Initialize descriptor's TKR information. */ + if (sym->ts.type == BT_CLASS) + gfc_trans_class_array (sym, block); + /* Deallocate when leaving the scope. Nullifying is not needed. */ if (!sym->attr.result && !sym->attr.dummy && !sym->attr.pointer diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 42a996d..f06ce3c 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -1771,14 +1771,12 @@ gimple_add_init_for_auto_var (tree decl, tree decl_name = NULL_TREE; if (DECL_NAME (decl)) - decl_name = build_string_literal (IDENTIFIER_LENGTH (DECL_NAME (decl)) + 1, - IDENTIFIER_POINTER (DECL_NAME (decl))); + decl_name = build_string_literal (DECL_NAME (decl)); else { char *decl_name_anonymous = xasprintf ("D.%u", DECL_UID (decl)); - decl_name = build_string_literal (strlen (decl_name_anonymous) + 1, - decl_name_anonymous); + decl_name = build_string_literal (decl_name_anonymous); free (decl_name_anonymous); } @@ -3570,7 +3568,7 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) return GS_OK; } /* If not optimizing, ignore the assumptions. */ - if (!optimize) + if (!optimize || seen_error ()) { *expr_p = NULL_TREE; return GS_ALL_DONE; @@ -3586,7 +3584,7 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) a separate function easily. */ tree guard = create_tmp_var (boolean_type_node); *expr_p = build2 (MODIFY_EXPR, void_type_node, guard, - CALL_EXPR_ARG (*expr_p, 0)); + gimple_boolify (CALL_EXPR_ARG (*expr_p, 0))); *expr_p = build3 (BIND_EXPR, void_type_node, NULL, *expr_p, NULL); push_gimplify_context (); gimple_seq body = NULL; @@ -4272,7 +4270,7 @@ gimple_boolify (tree expr) default: if (COMPARISON_CLASS_P (expr)) { - /* There expressions always prduce boolean results. */ + /* These expressions always produce boolean results. */ if (TREE_CODE (type) != BOOLEAN_TYPE) TREE_TYPE (expr) = boolean_type_node; return expr; diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc index 9a76470..b12bcc1 100644 --- a/gcc/lto-wrapper.cc +++ b/gcc/lto-wrapper.cc @@ -2010,8 +2010,8 @@ cont: truncate them as soon as we have processed it. This reduces temporary disk-space usage. */ if (! save_temps) - fprintf (mstream, "\t@-touch -r %s %s.tem > /dev/null 2>&1 " - "&& mv %s.tem %s\n", + fprintf (mstream, "\t@-touch -r \"%s\" \"%s.tem\" > /dev/null " + "2>&1 && mv \"%s.tem\" \"%s\"\n", input_name, input_name, input_name, input_name); } else diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc index 5dc0bf1..c636a17 100644 --- a/gcc/omp-expand.cc +++ b/gcc/omp-expand.cc @@ -10054,13 +10054,8 @@ expand_omp_target (struct omp_region *region) /* Handle the case that an inner ancestor:1 target is called by an outer target region. */ - if (!is_ancestor) - cgraph_node::get (child_fn)->calls_declare_variant_alt - |= cgraph_node::get (cfun->decl)->calls_declare_variant_alt; - else /* Duplicate function to create empty nonhost variant. */ + if (is_ancestor) { - /* Enable pass_omp_device_lower pass. */ - cgraph_node::get (cfun->decl)->calls_declare_variant_alt = 1; cgraph_node *fn2_node; child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn), FUNCTION_DECL, @@ -10074,7 +10069,7 @@ expand_omp_target (struct omp_region *region) TREE_PUBLIC (child_fn2) = 0; DECL_UNINLINABLE (child_fn2) = 1; DECL_EXTERNAL (child_fn2) = 0; - DECL_CONTEXT (child_fn2) = NULL_TREE; + DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn); DECL_INITIAL (child_fn2) = make_node (BLOCK); BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2; DECL_ATTRIBUTES (child_fn) @@ -10098,6 +10093,10 @@ expand_omp_target (struct omp_region *region) fn2_node->force_output = 1; node->offloadable = 0; + /* Enable pass_omp_device_lower pass. */ + fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn)); + fn2_node->calls_declare_variant_alt = 1; + t = build_decl (DECL_SOURCE_LOCATION (child_fn), RESULT_DECL, NULL_TREE, void_type_node); DECL_ARTIFICIAL (t) = 1; diff --git a/gcc/profile.cc b/gcc/profile.cc index 96121d6..1527a04 100644 --- a/gcc/profile.cc +++ b/gcc/profile.cc @@ -1457,11 +1457,13 @@ branch_prob (bool thunk) if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb) { location_t loc = DECL_SOURCE_LOCATION (current_function_decl); - gcc_checking_assert (!RESERVED_LOCATION_P (loc)); - seen_locations.add (loc); - expanded_location curr_location = expand_location (loc); - output_location (&streamed_locations, curr_location.file, - MAX (1, curr_location.line), &offset, bb); + if (!RESERVED_LOCATION_P (loc)) + { + seen_locations.add (loc); + expanded_location curr_location = expand_location (loc); + output_location (&streamed_locations, curr_location.file, + MAX (1, curr_location.line), &offset, bb); + } } for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 8777bc70..04208c8 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1269,6 +1269,15 @@ foperator_abs::op1_range (frange &r, tree type, positives.update_nan (/*sign=*/false); positives.intersect (lhs); r = positives; + // Add -NAN if relevant. + if (r.maybe_isnan ()) + { + frange neg_nan; + neg_nan.set_nan (type, true); + r.union_ (neg_nan); + } + if (r.known_isnan ()) + return true; // Then add the negative of each pair: // ABS(op1) = [5,20] would yield op1 => [-20,-5][5,20]. r.union_ (frange (type, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a201268..09c4108 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,237 @@ +2022-10-25 Patrick Palka <ppalka@redhat.com> + + PR c++/106848 + PR c++/102600 + * g++.dg/modules/enum-9_a.H: New test. + * g++.dg/modules/enum-9_b.C: New test. + * g++.dg/modules/enum-10_a.H: New test. + * g++.dg/modules/enum-10_b.C: New test. + * g++.dg/modules/enum-11_a.H: New test. + * g++.dg/modules/enum-11_b.C: New test. + +2022-10-25 H.J. Lu <hjl.tools@gmail.com> + + PR target/107304 + * gcc.target/i386/pr107304.c: New test. + +2022-10-25 Jason Merrill <jason@redhat.com> + + * g++.dg/cpp23/attr-assume9.C: New test. + * g++.dg/cpp23/attr-assume10.C: New test. + +2022-10-25 Jason Merrill <jason@redhat.com> + + * g++.dg/cpp23/attr-assume2.C: Expect constant values. + +2022-10-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/107176 + PR tree-optimization/66375 + PR tree-optimization/42512 + * gcc.dg/torture/pr107176.c: New testcase. + +2022-10-25 Eric Botcazou <ebotcazou@adacore.com> + + * gnat.dg/specs/coverage1.ads: New test. + * gnat.dg/specs/variant_part.ads: Minor tweak. + * gnat.dg/specs/weak1.ads: Add dg directive. + +2022-10-25 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/107368 + * gcc.dg/attr-assume-5.c: New test. + +2022-10-25 YunQiang Su <yunqiang.su@cipunited.com> + + * gcc.target/mips/compact-branches-1.c: add isa_rev>=6. + * gcc.target/mips/mips.exp: don't add -mipsXXr6 option for + -mcompact-branches=always. It is usable for pre-R6 now. + * gcc.target/mips/compact-branches-8.c: New test. + * gcc.target/mips/compact-branches-9.c: New test. + +2022-10-25 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/107369 + * gcc.dg/attr-assume-4.c: New test. + * g++.dg/cpp23/attr-assume8.C: New test. + +2022-10-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/100756 + * gcc.dg/vect/pr100756.c: New testcase. + +2022-10-25 Kewen Lin <linkw@linux.ibm.com> + + * lib/target-supports.exp (check_effective_target_vect_long_long): Add + support for powerpc*-*-*. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/107349 + * gcc.dg/analyzer/stdarg-1-ms_abi.c (pr107349): New. + * gcc.dg/analyzer/stdarg-1-sysv_abi.c (pr107349): New. + * gcc.dg/analyzer/stdarg-1.c (pr107349): New. + +2022-10-24 Martin Liska <mliska@suse.cz> + + PR analyzer/107366 + * gcc.dg/analyzer/sarif-pr107366.c: New test. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/107345 + * gcc.dg/analyzer/pr107345.c: New test. + +2022-10-24 David Malcolm <dmalcolm@redhat.com> + + PR analyzer/106300 + * gcc.dg/analyzer/pipe-1.c: New test. + * gcc.dg/analyzer/pipe-glibc.c: New test. + * gcc.dg/analyzer/pipe-manpages.c: New test. + * gcc.dg/analyzer/pipe2-1.c: New test. + +2022-10-24 Marek Polacek <polacek@redhat.com> + + PR c++/107276 + * g++.dg/cpp2a/decomp4.C: New test. + +2022-10-24 Jakub Jelinek <jakub@redhat.com> + + PR c++/107358 + * c-c++-common/pr107358.c: New test. + * g++.dg/cpp1y/pr68180.C: Remove -fexcess-precision=fast from + dg-options. + +2022-10-24 Wilco Dijkstra <wdijkstr@arm.com> + + PR target/106583 + * gcc.target/aarch64/pr106583.c: Add new test. + +2022-10-24 Jakub Jelinek <jakub@redhat.com> + + PR c++/105774 + * g++.dg/cpp1y/constexpr-105774.C: New test. + +2022-10-24 Arsen Arsenović <arsen@aarsen.me> + + * gcc.dg/noreturn-4.c: Removed. + * g++.dg/freestanding-main.C: New test. + * g++.dg/freestanding-nonint-main.C: New test. + * gcc.dg/freestanding-main.c: New test. + * gcc.dg/freestanding-nonint-main.c: New test. + +2022-10-24 Aldy Hernandez <aldyh@redhat.com> + + PR tree-optimization/107355 + * gcc.dg/tree-ssa/pr107355.c: New test. + +2022-10-24 Tobias Burnus <tobias@codesourcery.com> + + PR middle-end/107236 + * gfortran.dg/gomp/target-device-ancestor-6.f90: New test. + +2022-10-21 José Rui Faustino de Sousa <jrfsousa@gmail.com> + + PR fortran/100097 + PR fortran/100098 + * gfortran.dg/PR100097.f90: New test. + * gfortran.dg/PR100098.f90: New test. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR target/107322 + * gcc.target/i386/pr107322.c: New test. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR c++/107295 + * g++.dg/ext/vector42.C: New test. + +2022-10-21 Tejas Joshi <TejasSanjay.Joshi@amd.com> + + * gcc.target/i386/funcspec-56.inc: Handle new march. + * g++.target/i386/mv29.C: Likewise. + +2022-10-21 Thomas Schwinge <thomas@codesourcery.com> + + PR tree-optimization/107195 + * gcc.dg/tree-ssa/pr107195-3.c: New. + +2022-10-21 Richard Biener <rguenther@suse.de> + + PR tree-optimization/107323 + * gcc.dg/tree-ssa/pr107323.c: New testcase. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/54346 + * gcc.dg/pr54346.c: Add -Wno-psabi to dg-options. + +2022-10-21 Jakub Jelinek <jakub@redhat.com> + + PR c++/106863 + * g++.dg/cpp0x/dr2351-2.C: New test. + +2022-10-21 Ju-Zhe Zhong <juzhe.zhong@rivai.ai> + + * gcc.target/riscv/rvv/base/vsetvl-1.c: New test. + +2022-10-21 Haochen Jiang <haochen.jiang@intel.com> + + * gcc.target/i386/vnniint8-auto-vectorize-1.c: New test. + * gcc.target/i386/vnniint8-auto-vectorize-2.c: Ditto. + +2022-10-21 Kong Lingling <lingling.kong@intel.com> + Hongyu Wang <hongyu.wang@intel.com> + Haochen Jiang <haochen.jiang@intel.com> + + * g++.dg/other/i386-2.C: Add -mavxvnniint8. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/avx-check.h: Add avxvnniint8 check. + * gcc.target/i386/sse-12.c: Add -mavxvnniint8. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * lib/target-supports.exp + (check_effective_target_avxvnniint8): New. + * gcc.target/i386/avxvnniint8-1.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto. + * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto. + +2022-10-21 Hongyu Wang <hongyu.wang@intel.com> + + * gcc.target/i386/avx-check.h: Add avxifma check. + * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane.. + * gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this. + * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. + * gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto. + * gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test. + * gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto. + * gcc.target/i386/avx-ifma-1.c: Ditto. + * gcc.target/i386/avx-ifma-2.c: Ditto. + * gcc.target/i386/avx-ifma-3.c: Ditto. + * gcc.target/i386/avx-ifma-4.c: Ditto. + * gcc.target/i386/avx-ifma-5.c: Ditto. + * gcc.target/i386/avx-ifma-6.c: Ditto. + * gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto. + * gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto. + * gcc.target/i386/sse-12.c: Add -mavxifma. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * g++.dg/other/i386-2.C: Ditto. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * lib/target-supports.exp + (check_effective_target_avxifma): New. + 2022-10-20 Harald Anlauf <anlauf@gmx.de> Steven G. Kargl <kargl@gcc.gnu.org> diff --git a/gcc/testsuite/c-c++-common/pr107358.c b/gcc/testsuite/c-c++-common/pr107358.c new file mode 100644 index 0000000..4ab75e0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr107358.c @@ -0,0 +1,30 @@ +/* PR c++/107358 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fexcess-precision=standard" } */ + +typedef float __attribute__((vector_size (4 * sizeof (float)))) A; +typedef double __attribute__((vector_size (2 * sizeof (double)))) B; + +void +foo (A *x) +{ + *x = *x - 124.225514990f; +} + +void +bar (A *x, float y) +{ + *x = *x - y; +} + +void +baz (B *x) +{ + *x = *x + 124.225514990f; +} + +void +qux (B *x, double y) +{ + *x = *x + y; +} diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C new file mode 100644 index 0000000..8ca6fce --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-105774.C @@ -0,0 +1,15 @@ +// PR c++/105774 +// { dg-do compile { target c++14 } } + +constexpr signed char +foo () +{ +#if __SCHAR_MAX__ < __INT_MAX__ + signed char x = __SCHAR_MAX__; +#else + signed char x = 0; +#endif + return ++x; +} + +constexpr auto a = foo (); diff --git a/gcc/testsuite/g++.dg/cpp1y/pr68180.C b/gcc/testsuite/g++.dg/cpp1y/pr68180.C index 64d613e..9e6e5e9 100644 --- a/gcc/testsuite/g++.dg/cpp1y/pr68180.C +++ b/gcc/testsuite/g++.dg/cpp1y/pr68180.C @@ -1,6 +1,6 @@ // PR c++/68180 // { dg-do compile { target c++14 } } -// { dg-additional-options "-Wno-psabi -fexcess-precision=fast" } +// { dg-additional-options "-Wno-psabi" } typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t; constexpr float32x4_t fill(float x) { diff --git a/gcc/testsuite/g++.dg/cpp23/attr-assume10.C b/gcc/testsuite/g++.dg/cpp23/attr-assume10.C new file mode 100644 index 0000000..475555a --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp23/attr-assume10.C @@ -0,0 +1,22 @@ +// Test that s.i is not modified by the assume. +// { dg-do compile { target c++17 } } + +struct string +{ + const char *p; + int i; + constexpr string (const char *p): p(p), i(0) { } + constexpr int length () { ++i; return __builtin_strlen (p); } +}; + +constexpr int f() +{ + string s ("foobar"); + [[assume (s.length () > 0)]]; + if (s.i != 0) __builtin_abort(); + int len = s.length (); + if (s.i != 1) __builtin_abort(); + return len; +} + +static_assert (f()); diff --git a/gcc/testsuite/g++.dg/cpp23/attr-assume2.C b/gcc/testsuite/g++.dg/cpp23/attr-assume2.C index 9e54c14..4dd1221 100644 --- a/gcc/testsuite/g++.dg/cpp23/attr-assume2.C +++ b/gcc/testsuite/g++.dg/cpp23/attr-assume2.C @@ -26,7 +26,7 @@ f2 (int x) { #if __cpp_constexpr >= 201304L [[assume (x == 42)]]; // { dg-error "failed 'assume' attribute assumption" "" { target c++14 } } -#endif // { dg-message "the comparison reduces to '\\\(x == 42\\\)'" "" { target c++14 } .-1 } +#endif // { dg-message "the comparison reduces to '\\\(44 == 42\\\)'" "" { target c++14 } .-1 } return x; } @@ -76,7 +76,7 @@ f7 (int x, int y, int z, int w) { #if __cpp_constexpr >= 201304L [[assume (x == 42 && y == 43 && z == 44 && w == 45)]]; // { dg-error "failed 'assume' attribute assumption" "" { target c++14 } } -#endif // { dg-message "the comparison reduces to '\\\(z == 44\\\)'" "" { target c++14 } .-1 } +#endif // { dg-message "the comparison reduces to '\\\(45 == 44\\\)'" "" { target c++14 } .-1 } return x; } diff --git a/gcc/testsuite/g++.dg/cpp23/attr-assume8.C b/gcc/testsuite/g++.dg/cpp23/attr-assume8.C new file mode 100644 index 0000000..3c7a62f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp23/attr-assume8.C @@ -0,0 +1,12 @@ +// PR tree-optimization/107369 +// { dg-do compile { target c++11 } } +// { dg-options "-O1" } + +void +foo (int x) +{ + if (x == 1) + goto l1; // { dg-message "from here" } + + [[assume (({ l1:; 1; }))]]; // { dg-error "jump to label 'l1'" } +} // { dg-message "enters statement expression" "" { target *-*-* } .-1 } diff --git a/gcc/testsuite/g++.dg/cpp23/attr-assume9.C b/gcc/testsuite/g++.dg/cpp23/attr-assume9.C new file mode 100644 index 0000000..cbd6815 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp23/attr-assume9.C @@ -0,0 +1,19 @@ +// Diagnose failed assumptions involving a function call. +// { dg-do compile { target c++17 } } + +struct string +{ + const char *p; + constexpr string (const char *p): p(p) { } + constexpr int length () { return __builtin_strlen (p); } +}; + +constexpr int f() +{ + string s ("foobar"); + [[assume (s.length () == 0)]]; // { dg-error "assume" } + // { dg-message "6 == 0" "" { target *-*-* } .-1 } + return s.length (); +} + +static_assert (f()); // { dg-error "non-constant" } diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp4.C b/gcc/testsuite/g++.dg/cpp2a/decomp4.C new file mode 100644 index 0000000..d1b0c90 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/decomp4.C @@ -0,0 +1,8 @@ +// PR c++/107276 +// { dg-do compile { target c++20 } } + +auto f(auto x) { + auto [y] = x; // { dg-error "cannot decompose" } + return y; +} +int i = f(0); diff --git a/gcc/testsuite/g++.dg/ext/vector42.C b/gcc/testsuite/g++.dg/ext/vector42.C new file mode 100644 index 0000000..e781097 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/vector42.C @@ -0,0 +1,12 @@ +// PR c++/107295 +// { dg-do compile { target c++11 } } + +template <typename T> struct A { + typedef T __attribute__((vector_size (sizeof (int)))) V; +}; +template <int, typename T> using B = typename A<T>::V; +template <typename T> using V = B<4, T>; +using F = V<float>; +constexpr F a = F () + 0.0f; +constexpr F b = F () + (float) 0.0; +constexpr F c = F () + (float) 0.0L; diff --git a/gcc/testsuite/g++.dg/freestanding-main.C b/gcc/testsuite/g++.dg/freestanding-main.C new file mode 100644 index 0000000..3718cc4 --- /dev/null +++ b/gcc/testsuite/g++.dg/freestanding-main.C @@ -0,0 +1,5 @@ +/* Make sure we don't get a missing return warning on freestanding. */ +/* { dg-do compile } */ +/* { dg-options "-ffreestanding -Wreturn-type" } */ + +int main() {} diff --git a/gcc/testsuite/g++.dg/freestanding-nonint-main.C b/gcc/testsuite/g++.dg/freestanding-nonint-main.C new file mode 100644 index 0000000..a8571cc --- /dev/null +++ b/gcc/testsuite/g++.dg/freestanding-nonint-main.C @@ -0,0 +1,5 @@ +/* Check that we get the right warning for nonint main in freestanding. */ +/* { dg-do compile } */ +/* { dg-options "-ffreestanding -Wreturn-type" } */ + +const char *main() {} /* { dg-warning "-Wreturn-type" } */ diff --git a/gcc/testsuite/g++.dg/modules/enum-10_a.H b/gcc/testsuite/g++.dg/modules/enum-10_a.H new file mode 100644 index 0000000..fb7d10a --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-10_a.H @@ -0,0 +1,5 @@ +// PR c++/106848 +// { dg-additional-options -fmodule-header } +// { dg-module-cmi {} } + +typedef enum memory_order { memory_order_seq_cst } memory_order; diff --git a/gcc/testsuite/g++.dg/modules/enum-10_b.C b/gcc/testsuite/g++.dg/modules/enum-10_b.C new file mode 100644 index 0000000..76dc315 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-10_b.C @@ -0,0 +1,6 @@ +// PR c++/106848 +// { dg-additional-options "-fmodules-ts -g" } + +import "enum-10_a.H"; + +memory_order x = memory_order_seq_cst; diff --git a/gcc/testsuite/g++.dg/modules/enum-11_a.H b/gcc/testsuite/g++.dg/modules/enum-11_a.H new file mode 100644 index 0000000..1aecabf --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-11_a.H @@ -0,0 +1,5 @@ +// PR c++/102600 +// { dg-additional-options -fmodule-header } +// { dg-module-cmi {} } + +enum class byte : unsigned char { }; diff --git a/gcc/testsuite/g++.dg/modules/enum-11_b.C b/gcc/testsuite/g++.dg/modules/enum-11_b.C new file mode 100644 index 0000000..4d77cab --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-11_b.C @@ -0,0 +1,8 @@ +// PR c++/102600 +// { dg-additional-options -fmodules-ts } + +import "enum-11_a.H"; + +void push(byte) {} +void write(char v) { push(static_cast<byte>(v)); } +int main() { write(char{}); } diff --git a/gcc/testsuite/g++.dg/modules/enum-9_a.H b/gcc/testsuite/g++.dg/modules/enum-9_a.H new file mode 100644 index 0000000..0dd4a0f --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-9_a.H @@ -0,0 +1,13 @@ +// PR c++/106848 +// { dg-additional-options -fmodule-header } +// { dg-module-cmi {} } + +template<typename _T1> +struct pair { + using type = void(*)(const _T1&); +}; + +struct _ScannerBase { + enum _TokenT { _S_token_anychar }; + pair<_TokenT> _M_token_tbl; +}; diff --git a/gcc/testsuite/g++.dg/modules/enum-9_b.C b/gcc/testsuite/g++.dg/modules/enum-9_b.C new file mode 100644 index 0000000..95e2812 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/enum-9_b.C @@ -0,0 +1,6 @@ +// PR c++/106848 +// { dg-additional-options "-fmodules-ts -g" } + +import "enum-9_a.H"; + +_ScannerBase s; diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe-1.c b/gcc/testsuite/gcc.dg/analyzer/pipe-1.c new file mode 100644 index 0000000..6b95442 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pipe-1.c @@ -0,0 +1,38 @@ +#include "analyzer-decls.h" + +extern int pipe(int pipefd[2]); +extern int close(int fd); + +void +test_leak (void) +{ + int fds[2]; + if (pipe (fds) == -1) /* { dg-message "when 'pipe' succeeds" } */ + /* { dg-message "opened here as read-write" "sm msg" { target *-*-* } .-1 }} */ + return; +} /* { dg-line leak } */ +/* { dg-warning "leak of file descriptor 'fds\\\[0\\\]'" "leak of 0" { target *-*-* } leak } */ +/* { dg-warning "leak of file descriptor 'fds\\\[1\\\]'" "leak of 1" { target *-*-* } leak } */ +/* { dg-message "'fds\\\[0\\\]' leaks here" "final msg 0" { target *-*-* } leak }} */ +/* { dg-message "'fds\\\[1\\\]' leaks here" "final msg 1" { target *-*-* } leak }} */ + +void +test_close (void) +{ + int fds[2]; + if (pipe (fds) == -1) + return; + __analyzer_describe (0, fds[0]); /* { dg-warning "CONJURED" } */ + __analyzer_describe (0, fds[1]); /* { dg-warning "CONJURED" } */ + close (fds[0]); + close (fds[1]); +} + +void +test_unchecked (void) +{ + int fds[2]; + pipe (fds); /* { dg-message "when 'pipe' fails" } */ + close (fds[0]); /* { dg-warning "use of uninitialized value 'fds\\\[0\\\]'" } */ + close (fds[1]); /* { dg-warning "use of uninitialized value 'fds\\\[1\\\]'" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c b/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c new file mode 100644 index 0000000..a8546ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c @@ -0,0 +1,71 @@ +/* Example of pipe usage from glibc manual. */ + +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> + +/* Read characters from the pipe and echo them to stdout. */ + +void +read_from_pipe (int file) +{ + FILE *stream; + int c; + stream = fdopen (file, "r"); + while ((c = fgetc (stream)) != EOF) + putchar (c); + fclose (stream); +} + +/* Write some random text to the pipe. */ + +void +write_to_pipe (int file) +{ + FILE *stream; + stream = fdopen (file, "w"); + fprintf (stream, "hello, world!\n"); + fprintf (stream, "goodbye, world!\n"); + fclose (stream); +} + +int +main (void) +{ + pid_t pid; + int mypipe[2]; + + /* Create the pipe. */ + if (pipe (mypipe)) + { + fprintf (stderr, "Pipe failed.\n"); + return EXIT_FAILURE; + } + + + /* Create the child process. */ + pid = fork (); + if (pid == (pid_t) 0) + { + /* This is the child process. + Close other end first. */ + close (mypipe[1]); + read_from_pipe (mypipe[0]); + return EXIT_SUCCESS; + } + else if (pid < (pid_t) 0) + { + /* The fork failed. */ + fprintf (stderr, "Fork failed.\n"); + return EXIT_FAILURE; + } + else + { + /* This is the parent process. + Close other end first. */ + close (mypipe[0]); + write_to_pipe (mypipe[1]); + return EXIT_SUCCESS; + } +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c b/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c new file mode 100644 index 0000000..6b9ae4d --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c @@ -0,0 +1,76 @@ +/* Example of "pipe" from release 5.13 of the Linux man-pages project. + +Copyright (C) 2005, 2008, Michael Kerrisk <mtk.manpages@gmail.com> +(A few fragments remain from an earlier (1992) version by +Drew Eckhardt <drew@cs.colorado.edu>.) + +Permission is granted to make and distribute verbatim copies of this +manual provided the copyright notice and this permission notice are +preserved on all copies. + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the +entire resulting derived work is distributed under the terms of a +permission notice identical to this one. + +Since the Linux kernel and libraries are constantly changing, this +manual page may be incorrect or out-of-date. The author(s) assume no +responsibility for errors or omissions, or for damages resulting from +the use of the information contained herein. The author(s) may not +have taken the same level of care in the production of this manual, +which is licensed free of charge, as they might when working +professionally. + +Formatted or processed versions of this manual, if unaccompanied by +the source, must acknowledge the copyright and authors of this work. + + */ + +#include <sys/types.h> +#include <sys/wait.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +int +main(int argc, char *argv[]) +{ + int pipefd[2]; + pid_t cpid; + char buf; + + if (argc != 2) { + fprintf(stderr, "Usage: %s <string>\n", argv[0]); + exit(EXIT_FAILURE); + } + + if (pipe(pipefd) == -1) { + perror("pipe"); + exit(EXIT_FAILURE); + } + + cpid = fork(); + if (cpid == -1) { + perror("fork"); + exit(EXIT_FAILURE); + } + + if (cpid == 0) { /* Child reads from pipe */ + close(pipefd[1]); /* Close unused write end */ + + while (read(pipefd[0], &buf, 1) > 0) + write(STDOUT_FILENO, &buf, 1); + + write(STDOUT_FILENO, "\n", 1); + close(pipefd[0]); + _exit(EXIT_SUCCESS); + + } else { /* Parent writes argv[1] to pipe */ + close(pipefd[0]); /* Close unused read end */ + write(pipefd[1], argv[1], strlen(argv[1])); + close(pipefd[1]); /* Reader will see EOF */ + wait(NULL); /* Wait for child */ + exit(EXIT_SUCCESS); + } +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe2-1.c b/gcc/testsuite/gcc.dg/analyzer/pipe2-1.c new file mode 100644 index 0000000..d7afc9c --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pipe2-1.c @@ -0,0 +1,38 @@ +#include "analyzer-decls.h" + +extern int pipe2(int pipefd[2], int flags); +extern int close(int fd); + +void +test_leak (void) +{ + int fds[2]; + if (pipe2 (fds, 0) == -1) /* { dg-message "when 'pipe2' succeeds" } */ + /* { dg-message "opened here as read-write" "sm msg" { target *-*-* } .-1 }} */ + return; +} /* { dg-line leak } */ +/* { dg-warning "leak of file descriptor 'fds\\\[0\\\]'" "leak of 0" { target *-*-* } leak } */ +/* { dg-warning "leak of file descriptor 'fds\\\[1\\\]'" "leak of 1" { target *-*-* } leak } */ +/* { dg-message "'fds\\\[0\\\]' leaks here" "final msg 0" { target *-*-* } leak }} */ +/* { dg-message "'fds\\\[1\\\]' leaks here" "final msg 1" { target *-*-* } leak }} */ + +void +test_close (void) +{ + int fds[2]; + if (pipe2 (fds, 0) == -1) + return; + __analyzer_describe (0, fds[0]); /* { dg-warning "CONJURED" } */ + __analyzer_describe (0, fds[1]); /* { dg-warning "CONJURED" } */ + close (fds[0]); + close (fds[1]); +} + +void +test_unchecked (void) +{ + int fds[2]; + pipe2 (fds, 0); /* { dg-message "when 'pipe2' fails" } */ + close (fds[0]); /* { dg-warning "use of uninitialized value 'fds\\\[0\\\]'" } */ + close (fds[1]); /* { dg-warning "use of uninitialized value 'fds\\\[1\\\]'" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr107345.c b/gcc/testsuite/gcc.dg/analyzer/pr107345.c new file mode 100644 index 0000000..540596d --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr107345.c @@ -0,0 +1,17 @@ +/* Ensure the analyzer treats (NULL == &e) as being false for this case, + where the logic is sufficiently complicated to not be optimized away. */ + +#include <stdio.h> + +int main() { + int e = 10086; + int *f = &e; + int g = 0; + int *h[2][1]; + h[1][0] = f; + if (g == (h[1][0])) { /* { dg-warning "comparison between pointer and integer" } */ + unsigned int *i = 0; + } + printf("NPD_FLAG: %d\n ", *f); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c b/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c new file mode 100644 index 0000000..997cf56 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/sarif-pr107366.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fdiagnostics-format=sarif-file" } */ + +typedef enum { + HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO +} hwloc_topology_diff_obj_attr_type_t; +enum { HWLOC_TOPOLOGY_DIFF_OBJ_ATTR } hwloc_apply_diff_one_diff_0_0; + +void +hwloc_apply_diff_one() { + switch (hwloc_apply_diff_one_diff_0_0) + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: { + hwloc_topology_diff_obj_attr_type_t obj_attr_2_0_0; + switch (obj_attr_2_0_0) + case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: { + unsigned ii = 0; + } + } +} + diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c index b0143a7..e55f10d 100644 --- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c +++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-ms_abi.c @@ -435,3 +435,9 @@ void test_va_arg_after_return (void) __analyzer_called_by_test_va_arg_after_return (42, 1066); i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing pointer 'global_ap' to within stale stack frame" } */ } + +void pr107349 (void) +{ + __builtin_ms_va_list x,y; + __builtin_ms_va_copy(x,y); /* { dg-warning "use of uninitialized value 'y'" } */ +} /* { dg-warning "missing call to 'va_end'" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c index 1dc97ea3..fb49b35 100644 --- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c +++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1-sysv_abi.c @@ -435,3 +435,9 @@ void test_va_arg_after_return (void) __analyzer_called_by_test_va_arg_after_return (42, 1066); i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing pointer 'global_ap' to within stale stack frame" } */ } + +void pr107349 (void) +{ + __builtin_sysv_va_list x,y; + __builtin_sysv_va_copy(x,y); /* { dg-warning "use of uninitialized value 'y'" } */ +} /* { dg-warning "missing call to 'va_end'" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c b/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c index f23d28c5..f2766a50 100644 --- a/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/stdarg-1.c @@ -431,3 +431,9 @@ void test_va_arg_after_return (void) __analyzer_called_by_test_va_arg_after_return (42, 1066); i = __builtin_va_arg (global_ap, int); /* { dg-warning "dereferencing pointer 'global_ap' to within stale stack frame" } */ } + +void pr107349 (void) +{ + __builtin_va_list x,y; + __builtin_va_copy(x,y); /* { dg-warning "use of uninitialized value 'y'" } */ +} /* { dg-warning "missing call to 'va_end'" } */ diff --git a/gcc/testsuite/gcc.dg/attr-assume-4.c b/gcc/testsuite/gcc.dg/attr-assume-4.c new file mode 100644 index 0000000..e8a2b8d --- /dev/null +++ b/gcc/testsuite/gcc.dg/attr-assume-4.c @@ -0,0 +1,12 @@ +/* PR tree-optimization/107369 */ +/* { dg-do compile } */ +/* { dg-options "-std=c2x -O1" } */ + +void +foo (int x) +{ + if (x == 1) + goto l1; /* { dg-error "jump into statement expression" } */ + + [[gnu::assume (({ l1:; 1; }))]]; /* { dg-message "label 'l1' defined here" } */ +} diff --git a/gcc/testsuite/gcc.dg/attr-assume-5.c b/gcc/testsuite/gcc.dg/attr-assume-5.c new file mode 100644 index 0000000..8aa0f36 --- /dev/null +++ b/gcc/testsuite/gcc.dg/attr-assume-5.c @@ -0,0 +1,10 @@ +/* PR tree-optimization/107368 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +double +f4 (double x) +{ + [[gnu::assume (x && x > 0.0)]]; + return x; +} diff --git a/gcc/testsuite/gcc.dg/freestanding-main.c b/gcc/testsuite/gcc.dg/freestanding-main.c new file mode 100644 index 0000000..3718cc4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/freestanding-main.c @@ -0,0 +1,5 @@ +/* Make sure we don't get a missing return warning on freestanding. */ +/* { dg-do compile } */ +/* { dg-options "-ffreestanding -Wreturn-type" } */ + +int main() {} diff --git a/gcc/testsuite/gcc.dg/freestanding-nonint-main.c b/gcc/testsuite/gcc.dg/freestanding-nonint-main.c new file mode 100644 index 0000000..d839334 --- /dev/null +++ b/gcc/testsuite/gcc.dg/freestanding-nonint-main.c @@ -0,0 +1,5 @@ +/* Check that we get the right warning for nonint main in freestanding. */ +/* { dg-do compile } */ +/* { dg-options "-ffreestanding -Wreturn-type" } */ + +const char *main(void) {} /* { dg-warning "-Wreturn-type" } */ diff --git a/gcc/testsuite/gcc.dg/noreturn-4.c b/gcc/testsuite/gcc.dg/noreturn-4.c deleted file mode 100644 index 6fe1447..0000000 --- a/gcc/testsuite/gcc.dg/noreturn-4.c +++ /dev/null @@ -1,10 +0,0 @@ -/* Check for "noreturn" warning in main. */ -/* { dg-do compile } */ -/* { dg-options "-O2 -Wmissing-noreturn -ffreestanding" } */ -extern void exit (int) __attribute__ ((__noreturn__)); - -int -main (void) /* { dg-warning "function might be candidate for attribute 'noreturn'" "warn for main" } */ -{ - exit (0); -} diff --git a/gcc/testsuite/gcc.dg/torture/pr107176.c b/gcc/testsuite/gcc.dg/torture/pr107176.c new file mode 100644 index 0000000..c4f7b6d --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr107176.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ + +__INT32_TYPE__ a; +__INT64_TYPE__ b; +static inline __INT64_TYPE__ c(__UINT32_TYPE__ d) +{ + return d; +} +static inline void e(__INT32_TYPE__ d) +{ + a = d; +} +int main() +{ + b = 0; + for (; b < 1; b = c(b - 90) + 90 + 1) + ; + e(b >> 2); + if (a != 1073741824) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c new file mode 100644 index 0000000..4079634 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107355.c @@ -0,0 +1,13 @@ +// { dg-do compile } +// { dg-options "-O2 -fno-guess-branch-probability -fsanitize=float-cast-overflow --param=max-jump-thread-duplication-stmts=240" } + +float f; + +void +foo (double d) +{ + (char) f; + long l = __builtin_fabs (d); + (char) f; + (long) d; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr100756.c b/gcc/testsuite/gcc.dg/vect/pr100756.c new file mode 100644 index 0000000..c1362f2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr100756.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +int +foo (int * restrict a, int n) +{ + int i, result = 0; + + a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__); + for (i = 0; i < n * 4; i++) + result += a[i]; + return result; +} + +/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c new file mode 100644 index 0000000..0f93158 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c @@ -0,0 +1,41 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 --save-temps" } */ + +long f1 (void) +{ + return 0x7efefefefefefeff; +} + +long f2 (void) +{ + return 0x12345678aaaaaaaa; +} + +long f3 (void) +{ + return 0x1234cccccccc5678; +} + +long f4 (void) +{ + return 0x7777123456787777; +} + +long f5 (void) +{ + return 0x5555555512345678; +} + +long f6 (void) +{ + return 0x1234bbbb5678bbbb; +} + +long f7 (void) +{ + return 0x4444123444445678; +} + + +/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */ +/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr107304.c b/gcc/testsuite/gcc.target/i386/pr107304.c new file mode 100644 index 0000000..24d6879 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107304.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -march=tigerlake" } */ + +#include <stdint.h> + +typedef union { + uint8_t v __attribute__((aligned(256))) __attribute__ ((vector_size(64 * sizeof(uint8_t)))); + uint8_t i[64] __attribute__((aligned(256))); +} stress_vec_u8_64_t; + +typedef struct { + struct { + stress_vec_u8_64_t s; + stress_vec_u8_64_t o; + stress_vec_u8_64_t mask1; + stress_vec_u8_64_t mask2; + } u8_64; +} stress_vec_data_t; + +__attribute__((target_clones("arch=alderlake", "default"))) +void +stress_vecshuf_u8_64(stress_vec_data_t *data) +{ + stress_vec_u8_64_t *__restrict s; + stress_vec_u8_64_t *__restrict mask1; + stress_vec_u8_64_t *__restrict mask2; + register int i; + + s = &data->u8_64.s; + mask1 = &data->u8_64.mask1; + mask2 = &data->u8_64.mask2; + + for (i = 0; i < 256; i++) { /* was i < 65536 */ + stress_vec_u8_64_t tmp; + + tmp.v = __builtin_shuffle(s->v, mask1->v); + s->v = __builtin_shuffle(tmp.v, mask2->v); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr107322.c b/gcc/testsuite/gcc.target/i386/pr107322.c new file mode 100644 index 0000000..d3d11fe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107322.c @@ -0,0 +1,33 @@ +/* PR target/107322 */ +/* { dg-do compile } */ +/* { dg-options "-fexcess-precision=16 -O -msse2 -mfpmath=sse" } */ + +int i, j; +float k, l; +__bf16 f; + +void +foo (void) +{ + i *= 0 >= f; +} + +void +bar (void) +{ + i *= 0 <= f; +} + +void +baz (int x, int y) +{ + i = 0 >= f ? x : y; + j = 0 <= f ? x + 2 : y + 3; +} + +void +qux (float x, float y) +{ + k = 0 >= f ? x : y; + l = 0 <= f ? x + 2 : y + 3; +} diff --git a/gcc/testsuite/gcc.target/mips/compact-branches-1.c b/gcc/testsuite/gcc.target/mips/compact-branches-1.c index 9c7365e..6b8e197 100644 --- a/gcc/testsuite/gcc.target/mips/compact-branches-1.c +++ b/gcc/testsuite/gcc.target/mips/compact-branches-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-mcompact-branches=always -mno-micromips" } */ +/* { dg-options "-mcompact-branches=always -mno-micromips isa_rev>=6" } */ int glob; void diff --git a/gcc/testsuite/gcc.target/mips/compact-branches-8.c b/gcc/testsuite/gcc.target/mips/compact-branches-8.c new file mode 100644 index 0000000..1290ced --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/compact-branches-8.c @@ -0,0 +1,10 @@ +/* { dg-options "-mno-abicalls -mcompact-branches=always isa_rev<=5" } */ +void bar (int); + +void +foo () +{ + bar (1); +} + +/* { dg-final { scan-assembler "\t(j|jal)\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/compact-branches-9.c b/gcc/testsuite/gcc.target/mips/compact-branches-9.c new file mode 100644 index 0000000..4b23bf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/compact-branches-9.c @@ -0,0 +1,10 @@ +/* { dg-options "-mno-abicalls -fno-PIC -mcompact-branches=always isa_rev>=6" } */ +void bar (int); + +void +foo () +{ + bar (1); +} + +/* { dg-final { scan-assembler "\t(bc|balc)\t" } } */ diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp index 42a0dbb..c89626a 100644 --- a/gcc/testsuite/gcc.target/mips/mips.exp +++ b/gcc/testsuite/gcc.target/mips/mips.exp @@ -1167,10 +1167,8 @@ proc mips-dg-options { args } { # We need a revision 6 or better ISA for: # # - When the LSA instruction is required - # - When only using compact branches if { $isa_rev < 6 - && ([mips_have_test_option_p options "HAS_LSA"] - || [mips_have_test_option_p options "-mcompact-branches=always"]) } { + && ([mips_have_test_option_p options "HAS_LSA"]) } { if { $gp_size == 32 } { mips_make_test_option options "-mips32r6" } else { diff --git a/gcc/testsuite/gcc.target/powerpc/pr106460.c b/gcc/testsuite/gcc.target/powerpc/pr106460.c new file mode 100644 index 0000000..aae4b01 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr106460.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O1 -mdejagnu-cpu=power10" } */ + +/* (high:DI (symbol_ref:DI ("var_48")..))) should not cause ICE. */ +extern short var_48; +void +foo (double *r) +{ + if (var_48) + *r = 1234.5678; +} + diff --git a/gcc/testsuite/gcc.target/riscv/arch-18.c b/gcc/testsuite/gcc.target/riscv/arch-18.c new file mode 100644 index 0000000..bb04536 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-18.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32gcvh -mabi=ilp32 -mcmodel=medlow" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-5.c b/gcc/testsuite/gcc.target/riscv/arch-5.c index 2a0f3b7..b945a64 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-5.c +++ b/gcc/testsuite/gcc.target/riscv/arch-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32isabc_hghi_zfoo_xbar -mabi=ilp32" } */ +/* { dg-options "-march=rv32isabc_zfoo_xbar -mabi=ilp32" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/predef-23.c b/gcc/testsuite/gcc.target/riscv/predef-23.c new file mode 100644 index 0000000..676023f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/predef-23.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64ih_zfhmin -mabi=lp64f -mcmodel=medlow -misa-spec=20191213" } */ + +int main () { + +#ifndef __riscv_arch_test +#error "__riscv_arch_test" +#endif + +#if __riscv_xlen != 64 +#error "__riscv_xlen" +#endif + +#if !defined(__riscv_i) +#error "__riscv_i" +#endif + +#if defined(__riscv_c) +#error "__riscv_c" +#endif + +#if defined(__riscv_e) +#error "__riscv_e" +#endif + +#if defined(__riscv_a) +#error "__riscv_a" +#endif + +#if defined(__riscv_m) +#error "__riscv_m" +#endif + +#if !defined(__riscv_f) +#error "__riscv_f" +#endif + +#if defined(__riscv_d) +#error "__riscv_d" +#endif + +#if defined(__riscv_v) +#error "__riscv_v" +#endif + +#if defined(__riscv_zfh) +#error "__riscv_zfh" +#endif + +#if !defined(__riscv_zfhmin) +#error "__riscv_zfhmin" +#endif + +#if !defined(__riscv_zicsr) +#error "__riscv_zicsr" +#endif + +#if !defined(__riscv_h) +#error "__riscv_h" +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/predef-24.c b/gcc/testsuite/gcc.target/riscv/predef-24.c new file mode 100644 index 0000000..2b51a19 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/predef-24.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_svnapot -mabi=lp64 -mcmodel=medlow -misa-spec=20191213" } */ + +int main () { + +#ifndef __riscv_arch_test +#error "__riscv_arch_test" +#endif + +#if __riscv_xlen != 64 +#error "__riscv_xlen" +#endif + +#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000)) +#error "__riscv_i" +#endif + +#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000)) +#error "__riscv_c" +#endif + +#if defined(__riscv_e) +#error "__riscv_e" +#endif + +#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000 + 1 * 1000)) +#error "__riscv_a" +#endif + +#if !defined(__riscv_m) || (__riscv_m != (2 * 1000 * 1000)) +#error "__riscv_m" +#endif + +#if !defined(__riscv_f) || (__riscv_f != (2 * 1000 * 1000 + 2 * 1000)) +#error "__riscv_f" +#endif + +#if !defined(__riscv_d) || (__riscv_d != (2 * 1000 * 1000 + 2 * 1000)) +#error "__riscv_d" +#endif + +#if !defined(__riscv_svnapot) +#error "__riscv_svnapot" +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/predef-25.c b/gcc/testsuite/gcc.target/riscv/predef-25.c new file mode 100644 index 0000000..64bde17 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/predef-25.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_svinval -mabi=lp64 -mcmodel=medlow -misa-spec=20191213" } */ + +int main () { + +#ifndef __riscv_arch_test +#error "__riscv_arch_test" +#endif + +#if __riscv_xlen != 64 +#error "__riscv_xlen" +#endif + +#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000)) +#error "__riscv_i" +#endif + +#if !defined(__riscv_c) || (__riscv_c != (2 * 1000 * 1000)) +#error "__riscv_c" +#endif + +#if defined(__riscv_e) +#error "__riscv_e" +#endif + +#if !defined(__riscv_a) || (__riscv_a != (2 * 1000 * 1000 + 1 * 1000)) +#error "__riscv_a" +#endif + +#if !defined(__riscv_m) || (__riscv_m != (2 * 1000 * 1000)) +#error "__riscv_m" +#endif + +#if !defined(__riscv_f) || (__riscv_f != (2 * 1000 * 1000 + 2 * 1000)) +#error "__riscv_f" +#endif + +#if !defined(__riscv_d) || (__riscv_d != (2 * 1000 * 1000 + 2 * 1000)) +#error "__riscv_d" +#endif + +#if !defined(__riscv_svinval) +#error "__riscv_svinval" +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c new file mode 100644 index 0000000..6a235e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-1.c @@ -0,0 +1,179 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + vint8mf8_t v = *(vint8mf8_t*)in; + *(vint8mf8_t*)out = v; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + vint8mf4_t v = *(vint8mf4_t*)in; + *(vint8mf4_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + vint8mf2_t v = *(vint8mf2_t*)in; + *(vint8mf2_t*)out = v; +} + +/* +** mov4: +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + vint8m1_t v = *(vint8m1_t*)in; + *(vint8m1_t*)out = v; +} + +/* +** mov5: +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + vint8m2_t v = *(vint8m2_t*)in; + *(vint8m2_t*)out = v; +} + +/* +** mov6: +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + vint8m4_t v = *(vint8m4_t*)in; + *(vint8m4_t*)out = v; +} + +/* +** mov7: +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + vint8m8_t v = *(vint8m8_t*)in; + *(vint8m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint8_t *in, uint8_t *out) +{ + vuint8mf8_t v = *(vuint8mf8_t*)in; + *(vuint8mf8_t*)out = v; +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint8_t *in, uint8_t *out) +{ + vuint8mf4_t v = *(vuint8mf4_t*)in; + *(vuint8mf4_t*)out = v; +} + +/* +** mov10: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint8_t *in, uint8_t *out) +{ + vuint8mf2_t v = *(vuint8mf2_t*)in; + *(vuint8mf2_t*)out = v; +} + +/* +** mov11: +** vl1re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint8_t *in, uint8_t *out) +{ + vuint8m1_t v = *(vuint8m1_t*)in; + *(vuint8m1_t*)out = v; +} + +/* +** mov12: +** vl2re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint8_t *in, uint8_t *out) +{ + vuint8m2_t v = *(vuint8m2_t*)in; + *(vuint8m2_t*)out = v; +} + +/* +** mov13: +** vl4re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov13 (uint8_t *in, uint8_t *out) +{ + vuint8m4_t v = *(vuint8m4_t*)in; + *(vuint8m4_t*)out = v; +} + +/* +** mov14: +** vl8re8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov14 (uint8_t *in, uint8_t *out) +{ + vuint8m8_t v = *(vuint8m8_t*)in; + *(vuint8m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c new file mode 100644 index 0000000..10aa829 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-10.c @@ -0,0 +1,385 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf8_t v2 asm("v2") = v1; + *(vint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + register vint8mf4_t v1 asm("v1") = *(vint8mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf4_t v2 asm("v2") = v1; + *(vint8mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + register vint8mf2_t v1 asm("v1") = *(vint8mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf2_t v2 asm("v2") = v1; + *(vint8mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov4: +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8m1_t v2 asm("v2") = v1; + *(vint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov5: +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint8m2_t v4 asm("v4") = v2; + *(vint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov6: +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint8m4_t v8 asm("v8") = v4; + *(vint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov7: +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint8m8_t v16 asm("v16") = v8; + *(vint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov8 (int16_t *in, int16_t *out) +{ + register vint16mf4_t v1 asm("v1") = *(vint16mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16mf4_t v2 asm("v2") = v1; + *(vint16mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov9 (int16_t *in, int16_t *out) +{ + register vint16mf2_t v1 asm("v1") = *(vint16mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16mf2_t v2 asm("v2") = v1; + *(vint16mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov10: +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov10 (int16_t *in, int16_t *out) +{ + register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint16m1_t v2 asm("v2") = v1; + *(vint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov11: +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov11 (int16_t *in, int16_t *out) +{ + register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint16m2_t v4 asm("v4") = v2; + *(vint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov12: +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov12 (int16_t *in, int16_t *out) +{ + register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint16m4_t v8 asm("v8") = v4; + *(vint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov13: +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov13 (int32_t *in, int32_t *out) +{ + register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint16m8_t v16 asm("v16") = v8; + *(vint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (int32_t *in, int32_t *out) +{ + register vint32mf2_t v1 asm("v1") = *(vint32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint32mf2_t v2 asm("v2") = v1; + *(vint32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (int32_t *in, int32_t *out) +{ + register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint32m1_t v2 asm("v2") = v1; + *(vint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (int32_t *in, int32_t *out) +{ + register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint32m2_t v4 asm("v4") = v2; + *(vint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (int32_t *in, int32_t *out) +{ + register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint32m4_t v8 asm("v8") = v4; + *(vint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (int32_t *in, int32_t *out) +{ + register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint32m8_t v16 asm("v16") = v8; + *(vint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (int64_t *in, int64_t *out) +{ + register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint64m1_t v2 asm("v2") = v1; + *(vint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (int64_t *in, int64_t *out) +{ + register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vint64m2_t v4 asm("v4") = v2; + *(vint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (int64_t *in, int64_t *out) +{ + register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vint64m4_t v8 asm("v8") = v4; + *(vint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (int64_t *in, int64_t *out) +{ + register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vint64m8_t v16 asm("v16") = v8; + *(vint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c new file mode 100644 index 0000000..f8da5bb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-11.c @@ -0,0 +1,385 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov1 (uint8_t *in, uint8_t *out) +{ + register vuint8mf8_t v1 asm("v1") = *(vuint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf8_t v2 asm("v2") = v1; + *(vuint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov2 (uint8_t *in, uint8_t *out) +{ + register vuint8mf4_t v1 asm("v1") = *(vuint8mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf4_t v2 asm("v2") = v1; + *(vuint8mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov3 (uint8_t *in, uint8_t *out) +{ + register vuint8mf2_t v1 asm("v1") = *(vuint8mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8mf2_t v2 asm("v2") = v1; + *(vuint8mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov4: +** vl1re8\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov4 (uint8_t *in, uint8_t *out) +{ + register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint8m1_t v2 asm("v2") = v1; + *(vuint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov5: +** vl2re8\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov5 (uint8_t *in, uint8_t *out) +{ + register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint8m2_t v4 asm("v4") = v2; + *(vuint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov6: +** vl4re8\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov6 (uint8_t *in, uint8_t *out) +{ + register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint8m4_t v8 asm("v8") = v4; + *(vuint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov7: +** vl8re8\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov7 (uint8_t *in, uint8_t *out) +{ + register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint8m8_t v16 asm("v16") = v8; + *(vuint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov8 (uint16_t *in, uint16_t *out) +{ + register vuint16mf4_t v1 asm("v1") = *(vuint16mf4_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16mf4_t v2 asm("v2") = v1; + *(vuint16mf4_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov9 (uint16_t *in, uint16_t *out) +{ + register vuint16mf2_t v1 asm("v1") = *(vuint16mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16mf2_t v2 asm("v2") = v1; + *(vuint16mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov10: +** vl1re16\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov10 (uint16_t *in, uint16_t *out) +{ + register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint16m1_t v2 asm("v2") = v1; + *(vuint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov11: +** vl2re16\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov11 (uint16_t *in, uint16_t *out) +{ + register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint16m2_t v4 asm("v4") = v2; + *(vuint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov12: +** vl4re16\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov12 (uint16_t *in, uint16_t *out) +{ + register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint16m4_t v8 asm("v8") = v4; + *(vuint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov13: +** vl8re16\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov13 (uint32_t *in, uint32_t *out) +{ + register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint16m8_t v16 asm("v16") = v8; + *(vuint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (uint32_t *in, uint32_t *out) +{ + register vuint32mf2_t v1 asm("v1") = *(vuint32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint32mf2_t v2 asm("v2") = v1; + *(vuint32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (uint32_t *in, uint32_t *out) +{ + register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint32m1_t v2 asm("v2") = v1; + *(vuint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (uint32_t *in, uint32_t *out) +{ + register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint32m2_t v4 asm("v4") = v2; + *(vuint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (uint32_t *in, uint32_t *out) +{ + register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint32m4_t v8 asm("v8") = v4; + *(vuint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (uint32_t *in, uint32_t *out) +{ + register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint32m8_t v16 asm("v16") = v8; + *(vuint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (uint64_t *in, uint64_t *out) +{ + register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vuint64m1_t v2 asm("v2") = v1; + *(vuint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (uint64_t *in, uint64_t *out) +{ + register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vuint64m2_t v4 asm("v4") = v2; + *(vuint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (uint64_t *in, uint64_t *out) +{ + register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vuint64m4_t v8 asm("v8") = v4; + *(vuint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (uint64_t *in, uint64_t *out) +{ + register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vuint64m8_t v16 asm("v16") = v8; + *(vuint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c new file mode 100644 index 0000000..5b8ce40 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-12.c @@ -0,0 +1,159 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov14: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov14 (float *in, float *out) +{ + register vfloat32mf2_t v1 asm("v1") = *(vfloat32mf2_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat32mf2_t v2 asm("v2") = v1; + *(vfloat32mf2_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov15: +** vl1re32\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov15 (float *in, float *out) +{ + register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat32m1_t v2 asm("v2") = v1; + *(vfloat32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov16: +** vl2re32\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov16 (float *in, float *out) +{ + register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vfloat32m2_t v4 asm("v4") = v2; + *(vfloat32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov17: +** vl4re32\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov17 (float *in, float *out) +{ + register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vfloat32m4_t v8 asm("v8") = v4; + *(vfloat32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov18: +** vl8re32\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov18 (float *in, float *out) +{ + register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vfloat32m8_t v16 asm("v16") = v8; + *(vfloat32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** mov19: +** vl1re64\.v\tv1,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv1r\.v\tv2,v1 +** ... +** ret +*/ +void mov19 (uint64_t *in, uint64_t *out) +{ + register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vfloat64m1_t v2 asm("v2") = v1; + *(vfloat64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** mov20: +** vl2re64\.v\tv2,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv2r\.v\tv4,v2 +** ... +** ret +*/ +void mov20 (uint64_t *in, uint64_t *out) +{ + register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + register vfloat64m2_t v4 asm("v4") = v2; + *(vfloat64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** mov21: +** vl4re64\.v\tv4,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv4r\.v\tv8,v4 +** ... +** ret +*/ +void mov21 (uint64_t *in, uint64_t *out) +{ + register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + register vfloat64m4_t v8 asm("v8") = v4; + *(vfloat64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** mov22: +** vl8re64\.v\tv8,0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ... +** vmv8r\.v\tv16,v8 +** ... +** ret +*/ +void mov22 (uint64_t *in, uint64_t *out) +{ + register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + register vfloat64m8_t v16 asm("v16") = v8; + *(vfloat64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c new file mode 100644 index 0000000..8c630f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-13.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include <riscv_vector.h> + +void mov1 (int8_t *in, int8_t *out) +{ + register vint8mf8_t v1 asm("v1") = *(vint8mf8_t*)in; + asm volatile ("# %0"::"vr"(v1)); + register vint8mf8_t v2 asm("v2") = v1; + asm volatile ("#":::"v2"); + *(vint8mf8_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c new file mode 100644 index 0000000..b9bdd51 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-2.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int16_t *in, int16_t *out) +{ + vint16mf4_t v = *(vint16mf4_t*)in; + *(vint16mf4_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int16_t *in, int16_t *out) +{ + vint16mf2_t v = *(vint16mf2_t*)in; + *(vint16mf2_t*)out = v; +} + +/* +** mov4: +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int16_t *in, int16_t *out) +{ + vint16m1_t v = *(vint16m1_t*)in; + *(vint16m1_t*)out = v; +} + +/* +** mov5: +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int16_t *in, int16_t *out) +{ + vint16m2_t v = *(vint16m2_t*)in; + *(vint16m2_t*)out = v; +} + +/* +** mov6: +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int16_t *in, int16_t *out) +{ + vint16m4_t v = *(vint16m4_t*)in; + *(vint16m4_t*)out = v; +} + +/* +** mov7: +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int16_t *in, int16_t *out) +{ + vint16m8_t v = *(vint16m8_t*)in; + *(vint16m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint16_t *in, uint16_t *out) +{ + vuint16mf4_t v = *(vuint16mf4_t*)in; + *(vuint16mf4_t*)out = v; +} + +/* +** mov9: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint16_t *in, uint16_t *out) +{ + vuint16mf2_t v = *(vuint16mf2_t*)in; + *(vuint16mf2_t*)out = v; +} + +/* +** mov10: +** vl1re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint16_t *in, uint16_t *out) +{ + vuint16m1_t v = *(vuint16m1_t*)in; + *(vuint16m1_t*)out = v; +} + +/* +** mov11: +** vl2re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint16_t *in, uint16_t *out) +{ + vuint16m2_t v = *(vuint16m2_t*)in; + *(vuint16m2_t*)out = v; +} + +/* +** mov12: +** vl4re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint16_t *in, uint16_t *out) +{ + vuint16m4_t v = *(vuint16m4_t*)in; + *(vuint16m4_t*)out = v; +} + +/* +** mov13: +** vl8re16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov13 (uint16_t *in, uint16_t *out) +{ + vuint16m8_t v = *(vuint16m8_t*)in; + *(vuint16m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c new file mode 100644 index 0000000..a7a89db --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-3.c @@ -0,0 +1,127 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int32_t *in, int32_t *out) +{ + vint32mf2_t v = *(vint32mf2_t*)in; + *(vint32mf2_t*)out = v; +} + +/* +** mov4: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int32_t *in, int32_t *out) +{ + vint32m1_t v = *(vint32m1_t*)in; + *(vint32m1_t*)out = v; +} + +/* +** mov5: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int32_t *in, int32_t *out) +{ + vint32m2_t v = *(vint32m2_t*)in; + *(vint32m2_t*)out = v; +} + +/* +** mov6: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int32_t *in, int32_t *out) +{ + vint32m4_t v = *(vint32m4_t*)in; + *(vint32m4_t*)out = v; +} + +/* +** mov7: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int32_t *in, int32_t *out) +{ + vint32m8_t v = *(vint32m8_t*)in; + *(vint32m8_t*)out = v; +} + +/* +** mov8: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint32_t *in, uint32_t *out) +{ + vuint32mf2_t v = *(vuint32mf2_t*)in; + *(vuint32mf2_t*)out = v; +} + +/* +** mov9: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint32_t *in, uint32_t *out) +{ + vuint32m1_t v = *(vuint32m1_t*)in; + *(vuint32m1_t*)out = v; +} + +/* +** mov10: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint32_t *in, uint32_t *out) +{ + vuint32m2_t v = *(vuint32m2_t*)in; + *(vuint32m2_t*)out = v; +} + +/* +** mov11: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint32_t *in, uint32_t *out) +{ + vuint32m4_t v = *(vuint32m4_t*)in; + *(vuint32m4_t*)out = v; +} + +/* +** mov12: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov12 (uint32_t *in, uint32_t *out) +{ + vuint32m8_t v = *(vuint32m8_t*)in; + *(vuint32m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c new file mode 100644 index 0000000..e8cfb4b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-4.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov4: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int64_t *in, int64_t *out) +{ + vint64m1_t v = *(vint64m1_t*)in; + *(vint64m1_t*)out = v; +} + +/* +** mov5: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int64_t *in, int64_t *out) +{ + vint64m2_t v = *(vint64m2_t*)in; + *(vint64m2_t*)out = v; +} + +/* +** mov6: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int64_t *in, int64_t *out) +{ + vint64m4_t v = *(vint64m4_t*)in; + *(vint64m4_t*)out = v; +} + +/* +** mov7: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int64_t *in, int64_t *out) +{ + vint64m8_t v = *(vint64m8_t*)in; + *(vint64m8_t*)out = v; +} + +/* +** mov8: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov8 (uint64_t *in, uint64_t *out) +{ + vuint64m1_t v = *(vuint64m1_t*)in; + *(vuint64m1_t*)out = v; +} + +/* +** mov9: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov9 (uint64_t *in, uint64_t *out) +{ + vuint64m2_t v = *(vuint64m2_t*)in; + *(vuint64m2_t*)out = v; +} + +/* +** mov10: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov10 (uint64_t *in, uint64_t *out) +{ + vuint64m4_t v = *(vuint64m4_t*)in; + *(vuint64m4_t*)out = v; +} + +/* +** mov11: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov11 (uint64_t *in, uint64_t *out) +{ + vuint64m8_t v = *(vuint64m8_t*)in; + *(vuint64m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c new file mode 100644 index 0000000..5ca232b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-5.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (float *in, float *out) +{ + vfloat32mf2_t v = *(vfloat32mf2_t*)in; + *(vfloat32mf2_t*)out = v; +} + +/* +** mov4: +** vl1re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (float *in, float *out) +{ + vfloat32m1_t v = *(vfloat32m1_t*)in; + *(vfloat32m1_t*)out = v; +} + +/* +** mov5: +** vl2re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (float *in, float *out) +{ + vfloat32m2_t v = *(vfloat32m2_t*)in; + *(vfloat32m2_t*)out = v; +} + +/* +** mov6: +** vl4re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (float *in, float *out) +{ + vfloat32m4_t v = *(vfloat32m4_t*)in; + *(vfloat32m4_t*)out = v; +} + +/* +** mov7: +** vl8re32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (float *in, float *out) +{ + vfloat32m8_t v = *(vfloat32m8_t*)in; + *(vfloat32m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c new file mode 100644 index 0000000..41fc73b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-6.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov4: +** vl1re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs1r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (double *in, double *out) +{ + vfloat64m1_t v = *(vfloat64m1_t*)in; + *(vfloat64m1_t*)out = v; +} + +/* +** mov5: +** vl2re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs2r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (double *in, double *out) +{ + vfloat64m2_t v = *(vfloat64m2_t*)in; + *(vfloat64m2_t*)out = v; +} + +/* +** mov6: +** vl4re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs4r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (double *in, double *out) +{ + vfloat64m4_t v = *(vfloat64m4_t*)in; + *(vfloat64m4_t*)out = v; +} + +/* +** mov7: +** vl8re64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vs8r\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (double *in, double *out) +{ + vfloat64m8_t v = *(vfloat64m8_t*)in; + *(vfloat64m8_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c new file mode 100644 index 0000000..d4636e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-7.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include <riscv_vector.h> + +/* This testcase is testing whether RISC-V define REGMODE_NATURAL_SIZE. */ +void foo (int8_t *in, int8_t *out) +{ + vint8mf2_t v = *(vint8mf2_t*)in; + vint32mf2_t v2 = *(vint32mf2_t*)in; + *(vint8mf2_t*)out = v; + *(vint32mf2_t*)(out + 16) = v2; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c new file mode 100644 index 0000000..9447b05 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-8.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out) +{ + vbool64_t v = *(vbool64_t*)in; + *(vbool64_t*)out = v; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out) +{ + vbool32_t v = *(vbool32_t*)in; + *(vbool32_t*)out = v; +} + +/* +** mov3: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov3 (int8_t *in, int8_t *out) +{ + vbool16_t v = *(vbool16_t*)in; + *(vbool16_t*)out = v; +} + +/* +** mov4: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov4 (int8_t *in, int8_t *out) +{ + vbool8_t v = *(vbool8_t*)in; + *(vbool8_t*)out = v; +} + +/* +** mov5: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov5 (int8_t *in, int8_t *out) +{ + vbool4_t v = *(vbool4_t*)in; + *(vbool4_t*)out = v; +} + +/* +** mov6: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m4,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov6 (int8_t *in, int8_t *out) +{ + vbool2_t v = *(vbool2_t*)in; + *(vbool2_t*)out = v; +} + +/* +** mov7: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au] +** vlm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vsm\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov7 (int8_t *in, int8_t *out) +{ + vbool1_t v = *(vbool1_t*)in; + *(vbool1_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c new file mode 100644 index 0000000..6d39e3c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* Test tieable of RVV types with same LMUL. */ +/* +** mov1: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov1 (int8_t *in, int8_t *out, int M) +{ + vint8mf2_t v1 = *(vint8mf2_t*)(in); + vint16mf2_t v2 = *(vint16mf2_t*)(in); + vint32mf2_t v3 = *(vint32mf2_t*)(in); + *(vint8mf2_t*)(out) = v1; + *(vint16mf2_t*)(out + 1) = v2; + *(vint32mf2_t*)(out + 2) = v3; +} + +/* +** mov2: +** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** ret +*/ +void mov2 (int8_t *in, int8_t *out, int M) +{ + vint8mf4_t v1 = *(vint8mf4_t*)(in); + vint16mf4_t v2 = *(vint16mf4_t*)(in); + *(vint8mf4_t*)(out) = v1; + *(vint16mf4_t*)(out + 1) = v2; +} diff --git a/gcc/testsuite/gfortran.dg/PR100097.f90 b/gcc/testsuite/gfortran.dg/PR100097.f90 new file mode 100644 index 0000000..f927d29 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR100097.f90 @@ -0,0 +1,41 @@ +! { dg-do run } +! { dg-options "-fdump-tree-original" } +! +! Test the fix for PR100097 +! + +program main_p + implicit none + + class(*), pointer :: bar_p(:) + class(*), allocatable :: bar_a(:) + + call foo_p(bar_p) + call foo_a(bar_a) + +contains + + subroutine foo_p(that) + class(*), pointer, intent(out) :: that(..) + + select rank(that) + rank(1) + rank default + stop 1 + end select + end subroutine foo_p + + subroutine foo_a(that) + class(*), allocatable, intent(out) :: that(..) + + select rank(that) + rank(1) + rank default + stop 2 + end select + end subroutine foo_a + +end program main_p + +! { dg-final { scan-tree-dump "bar_a._data.dtype = \\{.* .rank=1,.*\\}" "original" } } +! { dg-final { scan-tree-dump "bar_p._data.dtype = \\{.* .rank=1,.*\\}" "original" } } diff --git a/gcc/testsuite/gfortran.dg/PR100098.f90 b/gcc/testsuite/gfortran.dg/PR100098.f90 new file mode 100644 index 0000000..26ac0c8 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR100098.f90 @@ -0,0 +1,45 @@ +! { dg-do run } +! { dg-options "-fdump-tree-original" } +! +! Test the fix for PR100098 +! + +program main_p + implicit none + + type :: foo_t + integer :: i + end type foo_t + + class(foo_t), pointer :: bar_p(:) + class(foo_t), allocatable :: bar_a(:) + + call foo_p(bar_p) + call foo_a(bar_a) + +contains + + subroutine foo_p(that) + class(foo_t), pointer, intent(out) :: that(..) + + select rank(that) + rank(1) + rank default + stop 1 + end select + end subroutine foo_p + + subroutine foo_a(that) + class(foo_t), allocatable, intent(out) :: that(..) + + select rank(that) + rank(1) + rank default + stop 2 + end select + end subroutine foo_a + +end program main_p + +! { dg-final { scan-tree-dump "bar_a._data.dtype = \\{.* .rank=1,.*\\}" "original" } } +! { dg-final { scan-tree-dump "bar_p._data.dtype = \\{.* .rank=1,.*\\}" "original" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 new file mode 100644 index 0000000..821e785 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-6.f90 @@ -0,0 +1,17 @@ +! PR middle-end/107236 + +! Did ICE before because IFN .GOMP_TARGET_REV was not +! processed in omp-offload.cc. +! Note: Test required ENABLE_OFFLOADING being true inside GCC. + +implicit none +!$omp requires reverse_offload +!$omp target parallel num_threads(4) + !$omp target device(ancestor:1) + call foo() + !$omp end target +!$omp end target parallel +contains + subroutine foo + end +end diff --git a/gcc/testsuite/gnat.dg/specs/coverage1.ads b/gcc/testsuite/gnat.dg/specs/coverage1.ads new file mode 100644 index 0000000..af7b61c --- /dev/null +++ b/gcc/testsuite/gnat.dg/specs/coverage1.ads @@ -0,0 +1,10 @@ +-- { dg-do compile } +-- { dg-options "-ftest-coverage" } + +package Coverage1 is + + type Rec is record + I : Integer := 0; + end record; + +end Coverage1; diff --git a/gcc/testsuite/gnat.dg/specs/variant_part.ads b/gcc/testsuite/gnat.dg/specs/variant_part.ads index afc92cd..72da410 100644 --- a/gcc/testsuite/gnat.dg/specs/variant_part.ads +++ b/gcc/testsuite/gnat.dg/specs/variant_part.ads @@ -1,4 +1,5 @@ -- { dg-do compile } + package Variant_Part is type T1(b: boolean) is record case (b) is -- { dg-error "discriminant name may not be parenthesized" } diff --git a/gcc/testsuite/gnat.dg/specs/weak1.ads b/gcc/testsuite/gnat.dg/specs/weak1.ads index 82cddc0..ece05ea 100644 --- a/gcc/testsuite/gnat.dg/specs/weak1.ads +++ b/gcc/testsuite/gnat.dg/specs/weak1.ads @@ -1,3 +1,5 @@ +-- { dg-do compile } + package Weak1 is Myconst : constant Integer := 1234; diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp index ac7712a..fe88d2f 100644 --- a/gcc/testsuite/lib/profopt.exp +++ b/gcc/testsuite/lib/profopt.exp @@ -289,7 +289,7 @@ proc auto-profopt-execute { src } { return } set profile_wrapper [profopt-perf-wrapper] - set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING" + set profile_option "-g -DFOR_AUTOFDO_TESTING" set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining" set run_autofdo 1 profopt-execute $src diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 7824a44..ccbbee8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7075,7 +7075,10 @@ proc check_effective_target_vect_long_long { } { || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) }}] + && [check_effective_target_s390_vx]) + || ([istarget powerpc*-*-*] + && ![istarget powerpc-*-linux*paired*] + && [check_effective_target_has_arch_pwr8]) }}] } diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index 9f30f78..7e2a3e9 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -577,6 +577,51 @@ get_scalar_evolution (basic_block instantiated_below, tree scalar) return res; } + +/* Depth first search algorithm. */ + +enum t_bool { + t_false, + t_true, + t_dont_know +}; + +class scev_dfs +{ +public: + scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_) + : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {} + t_bool get_ev (tree *, tree); + +private: + t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int); + t_bool follow_ssa_edge_binary (gimple *at_stmt, + tree type, tree rhs0, enum tree_code code, + tree rhs1, tree *evolution_of_loop, int limit); + t_bool follow_ssa_edge_in_condition_phi_branch (int i, + gphi *condition_phi, + tree *evolution_of_branch, + tree init_cond, int limit); + t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi, + tree *evolution_of_loop, int limit); + t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node, + tree *evolution_of_loop, int limit); + tree add_to_evolution (tree chrec_before, enum tree_code code, + tree to_add, gimple *at_stmt); + tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt); + + class loop *loop; + gphi *loop_phi_node; + tree init_cond; +}; + +t_bool +scev_dfs::get_ev (tree *ev_fn, tree arg) +{ + *ev_fn = chrec_dont_know; + return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0); +} + /* Helper function for add_to_evolution. Returns the evolution function for an assignment of the form "a = b + c", where "a" and "b" are on the strongly connected component. CHREC_BEFORE is the @@ -587,12 +632,12 @@ get_scalar_evolution (basic_block instantiated_below, tree scalar) evolution the expression TO_ADD, otherwise construct an evolution part for this loop. */ -static tree -add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add, - gimple *at_stmt) +tree +scev_dfs::add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt) { tree type, left, right; - class loop *loop = get_loop (cfun, loop_nb), *chloop; + unsigned loop_nb = loop->num; + class loop *chloop; switch (TREE_CODE (chrec_before)) { @@ -631,7 +676,7 @@ add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add, gcc_assert (flow_loop_nested_p (loop, chloop)); /* Search the evolution in LOOP_NB. */ - left = add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), + left = add_to_evolution_1 (CHREC_LEFT (chrec_before), to_add, at_stmt); right = CHREC_RIGHT (chrec_before); right = chrec_convert_rhs (chrec_type (left), right, at_stmt); @@ -646,6 +691,17 @@ add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add, left = chrec_before; right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt); + /* When we add the first evolution we need to replace the symbolic + evolution we've put in when the DFS reached the loop PHI node + with the initial value. There's only a limited cases of + extra operations ontop of that symbol allowed, namely + sign-conversions we can look through. For other cases we leave + the symbolic initial condition which causes build_polynomial_chrec + to return chrec_dont_know. See PR42512, PR66375 and PR107176 for + cases we mishandled before. */ + STRIP_NOPS (chrec_before); + if (chrec_before == gimple_phi_result (loop_phi_node)) + left = fold_convert (TREE_TYPE (left), init_cond); return build_polynomial_chrec (loop_nb, left, right); } } @@ -784,9 +840,9 @@ add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add, */ -static tree -add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code, - tree to_add, gimple *at_stmt) +tree +scev_dfs::add_to_evolution (tree chrec_before, enum tree_code code, + tree to_add, gimple *at_stmt) { tree type = chrec_type (to_add); tree res = NULL_TREE; @@ -803,7 +859,7 @@ add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code, if (dump_file && (dump_flags & TDF_SCEV)) { fprintf (dump_file, "(add_to_evolution \n"); - fprintf (dump_file, " (loop_nb = %d)\n", loop_nb); + fprintf (dump_file, " (loop_nb = %d)\n", loop->num); fprintf (dump_file, " (chrec_before = "); print_generic_expr (dump_file, chrec_before); fprintf (dump_file, ")\n (to_add = "); @@ -816,7 +872,7 @@ add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code, ? build_real (type, dconstm1) : build_int_cst_type (type, -1)); - res = add_to_evolution_1 (loop_nb, chrec_before, to_add, at_stmt); + res = add_to_evolution_1 (chrec_before, to_add, at_stmt); if (dump_file && (dump_flags & TDF_SCEV)) { @@ -828,64 +884,14 @@ add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code, return res; } - - -/* This section selects the loops that will be good candidates for the - scalar evolution analysis. For the moment, greedily select all the - loop nests we could analyze. */ - -/* For a loop with a single exit edge, return the COND_EXPR that - guards the exit edge. If the expression is too difficult to - analyze, then give up. */ - -gcond * -get_loop_exit_condition (const class loop *loop) -{ - gcond *res = NULL; - edge exit_edge = single_exit (loop); - - if (dump_file && (dump_flags & TDF_SCEV)) - fprintf (dump_file, "(get_loop_exit_condition \n "); - - if (exit_edge) - { - gimple *stmt; - - stmt = last_stmt (exit_edge->src); - if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt)) - res = cond_stmt; - } - - if (dump_file && (dump_flags & TDF_SCEV)) - { - print_gimple_stmt (dump_file, res, 0); - fprintf (dump_file, ")\n"); - } - - return res; -} - - -/* Depth first search algorithm. */ - -enum t_bool { - t_false, - t_true, - t_dont_know -}; - - -static t_bool follow_ssa_edge_expr (class loop *loop, gimple *, tree, gphi *, - tree *, int); /* Follow the ssa edge into the binary expression RHS0 CODE RHS1. Return true if the strongly connected component has been found. */ -static t_bool -follow_ssa_edge_binary (class loop *loop, gimple *at_stmt, - tree type, tree rhs0, enum tree_code code, tree rhs1, - gphi *halting_phi, tree *evolution_of_loop, - int limit) +t_bool +scev_dfs::follow_ssa_edge_binary (gimple *at_stmt, tree type, tree rhs0, + enum tree_code code, tree rhs1, + tree *evolution_of_loop, int limit) { t_bool res = t_false; tree evol; @@ -907,23 +913,18 @@ follow_ssa_edge_binary (class loop *loop, gimple *at_stmt, limit++; evol = *evolution_of_loop; - evol = add_to_evolution - (loop->num, - chrec_convert (type, evol, at_stmt), - code, rhs1, at_stmt); - res = follow_ssa_edge_expr - (loop, at_stmt, rhs0, halting_phi, &evol, limit); + res = follow_ssa_edge_expr (at_stmt, rhs0, &evol, limit); if (res == t_true) - *evolution_of_loop = evol; + *evolution_of_loop = add_to_evolution + (chrec_convert (type, evol, at_stmt), code, rhs1, at_stmt); else if (res == t_false) { - *evolution_of_loop = add_to_evolution - (loop->num, - chrec_convert (type, *evolution_of_loop, at_stmt), - code, rhs0, at_stmt); res = follow_ssa_edge_expr - (loop, at_stmt, rhs1, halting_phi, - evolution_of_loop, limit); + (at_stmt, rhs1, evolution_of_loop, limit); + if (res == t_true) + *evolution_of_loop = add_to_evolution + (chrec_convert (type, *evolution_of_loop, at_stmt), + code, rhs0, at_stmt); } } @@ -935,13 +936,11 @@ follow_ssa_edge_binary (class loop *loop, gimple *at_stmt, { /* Match an assignment under the form: "a = ... + c". */ - *evolution_of_loop = add_to_evolution - (loop->num, chrec_convert (type, *evolution_of_loop, - at_stmt), - code, rhs0, at_stmt); - res = follow_ssa_edge_expr - (loop, at_stmt, rhs1, halting_phi, - evolution_of_loop, limit); + res = follow_ssa_edge_expr (at_stmt, rhs1, evolution_of_loop, limit); + if (res == t_true) + *evolution_of_loop = add_to_evolution + (chrec_convert (type, *evolution_of_loop, at_stmt), + code, rhs0, at_stmt); } else @@ -989,13 +988,11 @@ backedge_phi_arg_p (gphi *phi, int i) true if the strongly connected component has been found following this path. */ -static inline t_bool -follow_ssa_edge_in_condition_phi_branch (int i, - class loop *loop, - gphi *condition_phi, - gphi *halting_phi, - tree *evolution_of_branch, - tree init_cond, int limit) +t_bool +scev_dfs::follow_ssa_edge_in_condition_phi_branch (int i, + gphi *condition_phi, + tree *evolution_of_branch, + tree init_cond, int limit) { tree branch = PHI_ARG_DEF (condition_phi, i); *evolution_of_branch = chrec_dont_know; @@ -1008,7 +1005,7 @@ follow_ssa_edge_in_condition_phi_branch (int i, if (TREE_CODE (branch) == SSA_NAME) { *evolution_of_branch = init_cond; - return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi, + return follow_ssa_edge_expr (condition_phi, branch, evolution_of_branch, limit); } @@ -1025,17 +1022,14 @@ follow_ssa_edge_in_condition_phi_branch (int i, /* This function merges the branches of a condition-phi-node in a loop. */ -static t_bool -follow_ssa_edge_in_condition_phi (class loop *loop, - gphi *condition_phi, - gphi *halting_phi, - tree *evolution_of_loop, int limit) +t_bool +scev_dfs::follow_ssa_edge_in_condition_phi (gphi *condition_phi, + tree *evolution_of_loop, int limit) { int i, n; tree init = *evolution_of_loop; tree evolution_of_branch; - t_bool res = follow_ssa_edge_in_condition_phi_branch (0, loop, condition_phi, - halting_phi, + t_bool res = follow_ssa_edge_in_condition_phi_branch (0, condition_phi, &evolution_of_branch, init, limit); if (res == t_false || res == t_dont_know) @@ -1053,8 +1047,7 @@ follow_ssa_edge_in_condition_phi (class loop *loop, /* Increase the limit by the PHI argument number to avoid exponential time and memory complexity. */ - res = follow_ssa_edge_in_condition_phi_branch (i, loop, condition_phi, - halting_phi, + res = follow_ssa_edge_in_condition_phi_branch (i, condition_phi, &evolution_of_branch, init, limit + i); if (res == t_false || res == t_dont_know) @@ -1072,11 +1065,9 @@ follow_ssa_edge_in_condition_phi (class loop *loop, it follows the edges in the parent loop. The inner loop is considered as a single statement. */ -static t_bool -follow_ssa_edge_inner_loop_phi (class loop *outer_loop, - gphi *loop_phi_node, - gphi *halting_phi, - tree *evolution_of_loop, int limit) +t_bool +scev_dfs::follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node, + tree *evolution_of_loop, int limit) { class loop *loop = loop_containing_stmt (loop_phi_node); tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node)); @@ -1096,9 +1087,8 @@ follow_ssa_edge_inner_loop_phi (class loop *outer_loop, /* Follow the edges that exit the inner loop. */ bb = gimple_phi_arg_edge (loop_phi_node, i)->src; if (!flow_bb_inside_loop_p (loop, bb)) - res = follow_ssa_edge_expr (outer_loop, loop_phi_node, - arg, halting_phi, - evolution_of_loop, limit); + res = follow_ssa_edge_expr (loop_phi_node, + arg, evolution_of_loop, limit); if (res == t_true) break; } @@ -1112,18 +1102,17 @@ follow_ssa_edge_inner_loop_phi (class loop *outer_loop, /* Otherwise, compute the overall effect of the inner loop. */ ev = compute_overall_effect_of_inner_loop (loop, ev); - return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi, - evolution_of_loop, limit); + return follow_ssa_edge_expr (loop_phi_node, ev, evolution_of_loop, limit); } /* Follow the ssa edge into the expression EXPR. Return true if the strongly connected component has been found. */ -static t_bool -follow_ssa_edge_expr (class loop *loop, gimple *at_stmt, tree expr, - gphi *halting_phi, tree *evolution_of_loop, - int limit) +t_bool +scev_dfs::follow_ssa_edge_expr (gimple *at_stmt, tree expr, + tree *evolution_of_loop, int limit) { + gphi *halting_phi = loop_phi_node; enum tree_code code; tree type, rhs0, rhs1 = NULL_TREE; @@ -1161,14 +1150,17 @@ tail_recurse: record their evolutions. Finally, merge the collected information and set the approximation to the main variable. */ - return follow_ssa_edge_in_condition_phi - (loop, phi, halting_phi, evolution_of_loop, limit); + return follow_ssa_edge_in_condition_phi (phi, evolution_of_loop, + limit); /* When the analyzed phi is the halting_phi, the depth-first search is over: we have found a path from the halting_phi to itself in the loop. */ if (phi == halting_phi) - return t_true; + { + *evolution_of_loop = expr; + return t_true; + } /* Otherwise, the evolution of the HALTING_PHI depends on the evolution of another loop-phi-node, i.e. the @@ -1179,9 +1171,8 @@ tail_recurse: /* Inner loop. */ if (flow_loop_nested_p (loop, def_loop)) - return follow_ssa_edge_inner_loop_phi - (loop, phi, halting_phi, evolution_of_loop, - limit + 1); + return follow_ssa_edge_inner_loop_phi (phi, evolution_of_loop, + limit + 1); /* Outer loop. */ return t_false; @@ -1214,6 +1205,8 @@ tail_recurse: { code = TREE_CODE (expr); type = TREE_TYPE (expr); + /* Via follow_ssa_edge_inner_loop_phi we arrive here with the + GENERIC scalar evolution of the inner loop. */ switch (code) { CASE_CONVERT: @@ -1224,6 +1217,8 @@ tail_recurse: case MINUS_EXPR: rhs0 = TREE_OPERAND (expr, 0); rhs1 = TREE_OPERAND (expr, 1); + STRIP_USELESS_TYPE_CONVERSION (rhs0); + STRIP_USELESS_TYPE_CONVERSION (rhs1); break; default: rhs0 = expr; @@ -1235,7 +1230,7 @@ tail_recurse: CASE_CONVERT: { /* This assignment is under the form "a_1 = (cast) rhs. */ - t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, + t_bool res = follow_ssa_edge_expr (at_stmt, rhs0, evolution_of_loop, limit); *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); return res; @@ -1260,24 +1255,22 @@ tail_recurse: case PLUS_EXPR: case MINUS_EXPR: /* This case is under the form "rhs0 +- rhs1". */ - STRIP_USELESS_TYPE_CONVERSION (rhs0); - STRIP_USELESS_TYPE_CONVERSION (rhs1); if (TREE_CODE (rhs0) == SSA_NAME && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR)) { /* Match an assignment under the form: - "a = b +- ...". - Use tail-recursion for the simple case. */ - *evolution_of_loop = add_to_evolution - (loop->num, chrec_convert (type, *evolution_of_loop, - at_stmt), - code, rhs1, at_stmt); - expr = rhs0; - goto tail_recurse; + "a = b +- ...". */ + t_bool res = follow_ssa_edge_expr (at_stmt, rhs0, + evolution_of_loop, limit); + if (res == t_true) + *evolution_of_loop = add_to_evolution + (chrec_convert (type, *evolution_of_loop, at_stmt), + code, rhs1, at_stmt); + return res; } /* Else search for the SCC in both rhs0 and rhs1. */ - return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, - halting_phi, evolution_of_loop, limit); + return follow_ssa_edge_binary (at_stmt, type, rhs0, code, rhs1, + evolution_of_loop, limit); case ASSERT_EXPR: /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>" @@ -1289,6 +1282,42 @@ tail_recurse: return t_false; } } + + +/* This section selects the loops that will be good candidates for the + scalar evolution analysis. For the moment, greedily select all the + loop nests we could analyze. */ + +/* For a loop with a single exit edge, return the COND_EXPR that + guards the exit edge. If the expression is too difficult to + analyze, then give up. */ + +gcond * +get_loop_exit_condition (const class loop *loop) +{ + gcond *res = NULL; + edge exit_edge = single_exit (loop); + + if (dump_file && (dump_flags & TDF_SCEV)) + fprintf (dump_file, "(get_loop_exit_condition \n "); + + if (exit_edge) + { + gimple *stmt; + + stmt = last_stmt (exit_edge->src); + if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt)) + res = cond_stmt; + } + + if (dump_file && (dump_flags & TDF_SCEV)) + { + print_gimple_stmt (dump_file, res, 0); + fprintf (dump_file, ")\n"); + } + + return res; +} /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP. @@ -1379,7 +1408,7 @@ analyze_evolution_in_loop (gphi *loop_phi_node, for (i = 0; i < n; i++) { tree arg = PHI_ARG_DEF (loop_phi_node, i); - tree ev_fn; + tree ev_fn = chrec_dont_know; t_bool res; /* Select the edges that enter the loop body. */ @@ -1392,9 +1421,8 @@ analyze_evolution_in_loop (gphi *loop_phi_node, bool val = false; /* Pass in the initial condition to the follow edge function. */ - ev_fn = init_cond; - res = follow_ssa_edge_expr (loop, loop_phi_node, arg, - loop_phi_node, &ev_fn, 0); + scev_dfs dfs (loop, loop_phi_node, init_cond); + res = dfs.get_ev (&ev_fn, arg); /* If ev_fn has no evolution in the inner loop, and the init_cond is not equal to ev_fn, then we have an @@ -1549,7 +1577,6 @@ analyze_initial_condition (gphi *loop_phi_node) static tree interpret_loop_phi (class loop *loop, gphi *loop_phi_node) { - tree res; class loop *phi_loop = loop_containing_stmt (loop_phi_node); tree init_cond; @@ -1557,26 +1584,7 @@ interpret_loop_phi (class loop *loop, gphi *loop_phi_node) /* Otherwise really interpret the loop phi. */ init_cond = analyze_initial_condition (loop_phi_node); - res = analyze_evolution_in_loop (loop_phi_node, init_cond); - - /* Verify we maintained the correct initial condition throughout - possible conversions in the SSA chain. */ - if (res != chrec_dont_know) - { - tree new_init = res; - if (CONVERT_EXPR_P (res) - && TREE_CODE (TREE_OPERAND (res, 0)) == POLYNOMIAL_CHREC) - new_init = fold_convert (TREE_TYPE (res), - CHREC_LEFT (TREE_OPERAND (res, 0))); - else if (TREE_CODE (res) == POLYNOMIAL_CHREC) - new_init = CHREC_LEFT (res); - STRIP_USELESS_TYPE_CONVERSION (new_init); - if (TREE_CODE (new_init) == POLYNOMIAL_CHREC - || !operand_equal_p (init_cond, new_init, 0)) - return chrec_dont_know; - } - - return res; + return analyze_evolution_in_loop (loop_phi_node, init_cond); } /* This function merges the branches of a condition-phi-node, diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index 1e0f609..4ffcef4 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2216,6 +2216,7 @@ expand_simple_operations (tree expr, tree stop, hash_map<tree, tree> &cache) case PLUS_EXPR: case MINUS_EXPR: + case MULT_EXPR: if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (expr)) && TYPE_OVERFLOW_TRAPS (TREE_TYPE (expr))) return expr; diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 777ba2f..4e2612e 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1925,6 +1925,16 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, tree container_type = TREE_TYPE (container); tree vectype = get_vectype_for_scalar_type (vinfo, container_type); + /* Calculate shift_n before the adjustments for widening loads, otherwise + the container may change and we have to consider offset change for + widening loads on big endianness. The shift_n calculated here can be + independent of widening. */ + unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant (); + unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant (); + unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type)); + if (BYTES_BIG_ENDIAN) + shift_n = prec - shift_n - mask_width; + /* We move the conversion earlier if the loaded type is smaller than the return type to enable the use of widening loads. */ if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type) @@ -1935,6 +1945,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, NOP_EXPR, container); container = gimple_get_lhs (pattern_stmt); container_type = TREE_TYPE (container); + prec = tree_to_uhwi (TYPE_SIZE (container_type)); vectype = get_vectype_for_scalar_type (vinfo, container_type); append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); } @@ -1953,12 +1964,6 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, shift_first = false; } - unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant (); - unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant (); - unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type)); - if (BYTES_BIG_ENDIAN) - shift_n = prec - shift_n - mask_width; - /* If we don't have to shift we only generate the mask, so just fix the code-path to shift_first. */ if (shift_n == 0) diff --git a/gcc/tree.cc b/gcc/tree.cc index 81a6cea..04603c8 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -14352,7 +14352,7 @@ maybe_wrap_with_location (tree expr, location_t loc) /* For now, don't add wrappers to exceptional tree nodes, to minimize any impact of the wrapper nodes. */ - if (EXCEPTIONAL_CLASS_P (expr)) + if (EXCEPTIONAL_CLASS_P (expr) || error_operand_p (expr)) return expr; /* Compiler-generated temporary variables don't need a wrapper. */ @@ -1135,7 +1135,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, /* Define fields and accessors for some special-purpose tree nodes. */ -/* As with STRING_CST, in C terms this is sizeof, not strlen. */ +/* Unlike STRING_CST, in C terms this is strlen, not sizeof. */ #define IDENTIFIER_LENGTH(NODE) \ (IDENTIFIER_NODE_CHECK (NODE)->identifier.id.len) #define IDENTIFIER_POINTER(NODE) \ @@ -4706,6 +4706,13 @@ extern tree build_alloca_call_expr (tree, unsigned int, HOST_WIDE_INT); extern tree build_string_literal (unsigned, const char * = NULL, tree = char_type_node, unsigned HOST_WIDE_INT = HOST_WIDE_INT_M1U); +inline tree build_string_literal (const char *p) +{ return build_string_literal (strlen (p) + 1, p); } +inline tree build_string_literal (tree t) +{ + return build_string_literal (IDENTIFIER_LENGTH (t) + 1, + IDENTIFIER_POINTER (t)); +} /* Construct various nodes representing data types. */ diff --git a/gcc/value-range.cc b/gcc/value-range.cc index bcda498..77e5a2c 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -341,7 +341,7 @@ frange::set (tree type, // For -ffinite-math-only we can drop ranges outside the // representable numbers to min/max for the type. - if (flag_finite_math_only) + if (!HONOR_INFINITIES (m_type)) { REAL_VALUE_TYPE min_repr = frange_val_min (m_type); REAL_VALUE_TYPE max_repr = frange_val_max (m_type); @@ -712,21 +712,21 @@ frange::supports_type_p (const_tree type) const void frange::verify_range () { - if (flag_finite_math_only) - gcc_checking_assert (!maybe_isnan ()); + if (!undefined_p ()) + gcc_checking_assert (HONOR_NANS (m_type) || !maybe_isnan ()); switch (m_kind) { case VR_UNDEFINED: gcc_checking_assert (!m_type); return; case VR_VARYING: - if (flag_finite_math_only) - gcc_checking_assert (!m_pos_nan && !m_neg_nan); - else - gcc_checking_assert (m_pos_nan && m_neg_nan); gcc_checking_assert (m_type); gcc_checking_assert (frange_val_is_min (m_min, m_type)); gcc_checking_assert (frange_val_is_max (m_max, m_type)); + if (HONOR_NANS (m_type)) + gcc_checking_assert (m_pos_nan && m_neg_nan); + else + gcc_checking_assert (!m_pos_nan && !m_neg_nan); return; case VR_RANGE: gcc_checking_assert (m_type); @@ -3957,11 +3957,13 @@ range_tests_floats () // A range of [-INF,+INF] is actually VARYING if no other properties // are set. r0 = frange_float ("-Inf", "+Inf"); - if (r0.maybe_isnan ()) - ASSERT_TRUE (r0.varying_p ()); + ASSERT_TRUE (r0.varying_p ()); // ...unless it has some special property... - r0.clear_nan (); - ASSERT_FALSE (r0.varying_p ()); + if (HONOR_NANS (r0.type ())) + { + r0.clear_nan (); + ASSERT_FALSE (r0.varying_p ()); + } // For most architectures, where float and double are different // sizes, having the same endpoints does not necessarily mean the @@ -4038,6 +4040,24 @@ range_tests_floats () } } +// Run floating range tests for various combinations of NAN and INF +// support. + +static void +range_tests_floats_various () +{ + int save_finite_math_only = flag_finite_math_only; + + // Test -ffinite-math-only. + flag_finite_math_only = 1; + range_tests_floats (); + // Test -fno-finite-math-only. + flag_finite_math_only = 0; + range_tests_floats (); + + flag_finite_math_only = save_finite_math_only; +} + void range_tests () { @@ -4046,7 +4066,7 @@ range_tests () range_tests_int_range_max (); range_tests_strict_enum (); range_tests_nonzero_bits (); - range_tests_floats (); + range_tests_floats_various (); range_tests_misc (); } diff --git a/gcc/value-range.h b/gcc/value-range.h index b48542a..c87734d 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -1201,10 +1201,10 @@ real_min_representable (const_tree type) inline REAL_VALUE_TYPE frange_val_min (const_tree type) { - if (flag_finite_math_only) - return real_min_representable (type); - else + if (HONOR_INFINITIES (type)) return dconstninf; + else + return real_min_representable (type); } // Return the maximum value for TYPE. @@ -1212,10 +1212,10 @@ frange_val_min (const_tree type) inline REAL_VALUE_TYPE frange_val_max (const_tree type) { - if (flag_finite_math_only) - return real_max_representable (type); - else + if (HONOR_INFINITIES (type)) return dconstinf; + else + return real_max_representable (type); } // Return TRUE if R is the minimum value for TYPE. diff --git a/gcc/vtable-verify.cc b/gcc/vtable-verify.cc index 24894e7..f01058e 100644 --- a/gcc/vtable-verify.cc +++ b/gcc/vtable-verify.cc @@ -725,10 +725,6 @@ verify_bb_vtables (basic_block bb) trace information to debug problems. */ if (flag_vtv_debug) { - int len1 = IDENTIFIER_LENGTH - (DECL_NAME (vtbl_var_decl)); - int len2 = strlen (vtable_name); - call_stmt = gimple_build_call (verify_vtbl_ptr_fndecl, 4, build1 (ADDR_EXPR, @@ -737,12 +733,8 @@ verify_bb_vtables (basic_block bb) vtbl_var_decl), lhs, build_string_literal - (len1 + 1, - IDENTIFIER_POINTER - (DECL_NAME - (vtbl_var_decl))), - build_string_literal (len2 + 1, - vtable_name)); + (DECL_NAME (vtbl_var_decl)), + build_string_literal (vtable_name)); } else call_stmt = gimple_build_call |