diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2017-10-27 02:40:17 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2017-10-27 02:40:17 +0000 |
commit | 42c1be422c610d844de315d7de6fd29c28afa1ae (patch) | |
tree | 4a44cc3771afbe803f8db3fcb14434fbe3b73824 /gcc | |
parent | 6de8466358aad789d13f78a8c59127884afede60 (diff) | |
parent | e1b76fde8ffaa74dffc895c2e2e625e30428b435 (diff) | |
download | gcc-42c1be422c610d844de315d7de6fd29c28afa1ae.zip gcc-42c1be422c610d844de315d7de6fd29c28afa1ae.tar.gz gcc-42c1be422c610d844de315d7de6fd29c28afa1ae.tar.bz2 |
Merge from trunk revision 254126.
From-SVN: r254132
Diffstat (limited to 'gcc')
99 files changed, 3380 insertions, 1426 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2f750ac..558ec9b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,242 @@ +2017-10-26 Sandra Loosemore <sandra@codesourcery.com> + + * config/nios2/constraints.md ("S"): Match r0rel_constant_p too. + * config/nios2/nios2-protos.h (r0rel_constant_p): Declare. + * config/nios2/nios2.c: (nios2_r0rel_sec_regex): New. + (nios2_option_overide): Initialize it. Don't allow R0-relative + addressing with PIC. + (nios2_rtx_costs): Handle r0rel_constant_p like gprel_constant_p. + (nios2_symbolic_constant_p): Likewise. + (nios2_legitimate_address_p): Likewise. + (nios2_r0rel_section_name_p): New. + (nios2_symbol_ref_in_r0rel_data_p): New. + (nios2_emit_move_sequence): Handle r0rel_constant_p. + (r0rel_constant_p): New. + (nios2_print_operand_address): Handle r0rel_constant_p. + (nios2_cdx_narrow_form_p): Likewise. + * config/nios2/nios2.opt (mr0rel-sec=): New option. + * doc/invoke.texi (Option Summary): Add -mr0rel-sec. + (Nios II Options): Document -mr0rel-sec. + +2017-10-26 Sandra Loosemore <sandra@codesourcery.com> + + * config/nios2/nios2.c: Include xregex.h. + (nios2_gprel_sec_regex): New. + (nios2_option_overide): Initialize it. Don't allow GP-relative + addressing with PIC. + (nios2_small_section_name_p): Check for regex match. + * config/nios2/nios2.opt (mgprel-sec=): New option. + * doc/invoke.texi (Option Summary): Add -mgprel-sec. + (Nios II Options): Document -mgprel-sec. + +2017-10-26 Jim Wilson <wilson@tuliptree.org> + + * doc/invoke.texi (-fdebug-prefix-map): Expand documentation. + +2017-10-26 Tom de Vries <tom@codesourcery.com> + + PR tree-optimization/82707 + * gimple.c (gimple_copy): Fix unsharing of + GIMPLE_OMP_{SINGLE,TARGET,TEAMS}. + +2017-10-26 Olga Makhotina <olga.makhotina@intel.com> + + * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): New intrinsics. + +2017-10-26 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/aix.h (TARGET_IEEEQUAD_DEFAULT): Set long double + default to IBM. + * config/rs6000/darwin.h (TARGET_IEEEQUAD_DEFAULT): Likewise. + * config/rs6000/rs6000.opt (-mabi=ieeelongdouble): Move the + warning to rs6000.c. Remove the Undocumented flag, since it has + been documented. + (-mabi=ibmlongdouble): Likewise. + * config/rs6000/rs6000.c (TARGET_IEEEQUAD_DEFAULT): If it is not + already set, set the default format for long double. + (rs6000_debug_reg_global): Print whether long double is IBM or + IEEE. + (rs6000_option_override_internal): Rework setting long double + format. Only warn if the user is changing the long double default + and they did not use -Wno-psabi. + * doc/invoke.texi (PowerPC options): Update the documentation for + -mabi=ieeelongdouble and -mabi=ibmlongdouble. + +2017-10-26 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + + * rtl.h (wider_subreg_mode): New function. + * ira.h (ira_sort_regnos_for_alter_reg): Take a machine_mode * + rather than an unsigned int *. + * ira-color.c (regno_max_ref_width): Replace with... + (regno_max_ref_mode): ...this new variable. + (coalesced_pseudo_reg_slot_compare): Update accordingly. + Use wider_subreg_mode. + (ira_sort_regnos_for_alter_reg): Likewise. Take a machine_mode * + rather than an unsigned int *. + * lra-constraints.c (uses_hard_regs_p): Use wider_subreg_mode. + (process_alt_operands): Likewise. + (invariant_p): Likewise. + * lra-spills.c (assign_mem_slot): Likewise. + (add_pseudo_to_slot): Likewise. + * lra.c (collect_non_operand_hard_regs): Likewise. + (add_regs_to_insn_regno_info): Likewise. + * reload1.c (regno_max_ref_width): Replace with... + (regno_max_ref_mode): ...this new variable. + (reload): Update accordingly. Update call to + ira_sort_regnos_for_alter_reg. + (alter_reg): Update to use regno_max_ref_mode. Call wider_subreg_mode. + (init_eliminable_invariants): Update to use regno_max_ref_mode. + (scan_paradoxical_subregs): Likewise. + +2017-10-26 Wilco Dijkstra <wdijkstr@arm.com> + + * config/aarch64/aarch64.h (EXIT_IGNORE_STACK): Set if alloca is used. + (aarch64_frame): Add emit_frame_chain boolean. + * config/aarch64/aarch64.c (aarch64_frame_pointer_required) + Move eh_return case to aarch64_layout_frame. + (aarch64_layout_frame): Initialize emit_frame_chain. + (aarch64_expand_prologue): Use emit_frame_chain. + +2017-10-26 Wilco Dijkstra <wdijkstr@arm.com> + + * config/aarch64/aarch64.c (aarch64_layout_frame): + Ensure LR is always stored at the bottom of the callee-saves. + Remove rarely used frame layout which saves callee-saves at top of + frame, so the store of LR can be used as a valid probe in all cases. + +2017-10-26 Wilco Dijkstra <wdijkstr@arm.com> + + * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement): + Improve unaligned TImode/TFmode base/offset split. + +2017-10-26 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + + * caller-save.c (mark_referenced_regs): Use read_modify_subreg_p. + * combine.c (find_single_use_1): Likewise. + (expand_field_assignment): Likewise. + (move_deaths): Likewise. + * lra-constraints.c (simplify_operand_subreg): Likewise. + (curr_insn_transform): Likewise. + * lra.c (collect_non_operand_hard_regs): Likewise. + (add_regs_to_insn_regno_info): Likewise. + * rtlanal.c (reg_referenced_p): Likewise. + (covers_regno_no_parallel_p): Likewise. + +2017-10-26 Richard Sandiford <richard.sandiford@linaro.org> + + * wide-int-print.cc (print_hex): Loop based on extract_uhwi. + Don't print any bits outside the precision of the value. + * wide-int.cc (test_printing): Add some new tests. + +2017-10-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + * configure.ac (gcc_cv_as_ix86_xbrace_comment): Check if assembler + supports -xbrace_comment option. + * configure: Regenerate. + * config.in: Regenerate. + * config/i386/sol2.h (ASM_XBRACE_COMMENT_SPEC): Define. + (ASM_CPU_SPEC): Use it. + +2017-10-26 Richard Sandiford <richard.sandiford@linaro.org> + + * target.def (static_rtx_alignment): New hook. + * targhooks.h (default_static_rtx_alignment): Declare. + * targhooks.c (default_static_rtx_alignment): New function. + * doc/tm.texi.in (TARGET_STATIC_RTX_ALIGNMENT): New hook. + * doc/tm.texi: Regenerate. + * varasm.c (force_const_mem): Use targetm.static_rtx_alignment + instead of targetm.constant_alignment. Remove call to + set_mem_attributes. + * config/cris/cris.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + (cris_preferred_mininum_alignment): New function, split out from... + (cris_constant_alignment): ...here. + (cris_static_rtx_alignment): New function. + * config/i386/i386.c (ix86_static_rtx_alignment): New function, + split out from... + (ix86_constant_alignment): ...here. + (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + * config/mmix/mmix.c (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + (mmix_static_rtx_alignment): New function. + * config/spu/spu.c (spu_static_rtx_alignment): New function. + (TARGET_STATIC_RTX_ALIGNMENT): Redefine. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + PR target/81800 + * config/aarch64/aarch64.md (lrint<GPF:mode><GPI:mode>2): Add flag_trapping_math + and flag_fp_int_builtin_inexact. + +2017-10-25 Palmer Dabbelt <palmer@dabbelt.com> + + * config/riscv/riscv.md (ZERO_EXTEND_LOAD): Define. + * config/riscv/pic.md (local_pic_load): Rename to local_pic_load_s, + mark as a sign-extending load. + (local_pic_load_u): Define. + +2017-10-25 Eric Botcazou <ebotcazou@adacore.com> + + PR middle-end/82062 + * fold-const.c (operand_equal_for_comparison_p): Also return true + if ARG0 is a simple variant of ARG1 with narrower precision. + (fold_ternary_loc): Always pass unstripped operands to the predicate. + +2017-10-25 Jan Hubicka <hubicka@ucw.cz> + + * i386.c (ix86_builtin_vectorization_cost): Compute scatter/gather + cost correctly. + * i386.h (processor_costs): Add gather_static, gather_per_elt, + scatter_static, scatter_per_elt. + * x86-tune-costs.h: Add new cost entries. + +2017-10-25 Richard Biener <rguenther@suse.de> + + * tree-ssa-sccvn.h (vn_eliminate): Declare. + * tree-ssa-pre.c (class eliminate_dom_walker, eliminate, + class pass_fre): Move to ... + * tree-ssa-sccvn.c (class eliminate_dom_walker, vn_eliminate, + class pass_fre): ... here and adjust for statistics. + +2017-10-25 Jakub Jelinek <jakub@redhat.com> + + PR libstdc++/81706 + * attribs.c (attribute_value_equal): Use omp_declare_simd_clauses_equal + for comparison of OMP_CLAUSEs regardless of flag_openmp{,_simd}. + (duplicate_one_attribute, copy_attributes_to_builtin): New functions. + * attribs.h (duplicate_one_attribute, copy_attributes_to_builtin): New + declarations. + +2017-10-25 Richard Biener <rguenther@suse.de> + + * tree-ssa-pre.c (need_eh_cleanup, need_ab_cleanup, el_to_remove, + el_to_fixup, el_todo, el_avail, el_avail_stack, eliminate_avail, + eliminate_push_avail, eliminate_insert): Move inside... + (class eliminate_dom_walker): ... this class in preparation + of move. + (fini_eliminate): Remove by merging with ... + (eliminate): ... this function. Adjust for class changes. + (pass_pre::execute): Remove fini_eliminate call. + (pass_fre::execute): Likewise. + 2017-10-24 Jakub Jelinek <jakub@redhat.com> PR target/82460 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 39319a6..882f5ad 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20171024 +20171026 diff --git a/gcc/attribs.c b/gcc/attribs.c index ed76a8d..809f4c3 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -1125,9 +1125,9 @@ attribute_value_equal (const_tree attr1, const_tree attr2) TREE_VALUE (attr2)) == 1); } - if ((flag_openmp || flag_openmp_simd) - && TREE_VALUE (attr1) && TREE_VALUE (attr2) + if (TREE_VALUE (attr1) && TREE_CODE (TREE_VALUE (attr1)) == OMP_CLAUSE + && TREE_VALUE (attr2) && TREE_CODE (TREE_VALUE (attr2)) == OMP_CLAUSE) return omp_declare_simd_clauses_equal (TREE_VALUE (attr1), TREE_VALUE (attr2)); @@ -1322,6 +1322,44 @@ merge_decl_attributes (tree olddecl, tree newdecl) DECL_ATTRIBUTES (newdecl)); } +/* Duplicate all attributes with name NAME in ATTR list to *ATTRS if + they are missing there. */ + +void +duplicate_one_attribute (tree *attrs, tree attr, const char *name) +{ + attr = lookup_attribute (name, attr); + if (!attr) + return; + tree a = lookup_attribute (name, *attrs); + while (attr) + { + tree a2; + for (a2 = a; a2; a2 = lookup_attribute (name, TREE_CHAIN (a2))) + if (attribute_value_equal (attr, a2)) + break; + if (!a2) + { + a2 = copy_node (attr); + TREE_CHAIN (a2) = *attrs; + *attrs = a2; + } + attr = lookup_attribute (name, TREE_CHAIN (attr)); + } +} + +/* Duplicate all attributes from user DECL to the corresponding + builtin that should be propagated. */ + +void +copy_attributes_to_builtin (tree decl) +{ + tree b = builtin_decl_explicit (DECL_FUNCTION_CODE (decl)); + if (b) + duplicate_one_attribute (&DECL_ATTRIBUTES (b), + DECL_ATTRIBUTES (decl), "omp declare simd"); +} + #if TARGET_DLLIMPORT_DECL_ATTRIBUTES /* Specialization of merge_decl_attributes for various Windows targets. diff --git a/gcc/attribs.h b/gcc/attribs.h index 65e002c..f4bfe03 100644 --- a/gcc/attribs.h +++ b/gcc/attribs.h @@ -77,6 +77,16 @@ extern tree remove_attribute (const char *, tree); extern tree merge_attributes (tree, tree); +/* Duplicate all attributes with name NAME in ATTR list to *ATTRS if + they are missing there. */ + +extern void duplicate_one_attribute (tree *, tree, const char *); + +/* Duplicate all attributes from user DECL to the corresponding + builtin that should be propagated. */ + +extern void copy_attributes_to_builtin (tree); + /* Given two Windows decl attributes lists, possibly including dllimport, return a list of their union . */ extern tree merge_dllimport_decl_attributes (tree, tree); diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index c260f62..5d028b45 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,15 @@ +2017-10-25 David Malcolm <dmalcolm@redhat.com> + + PR c/7356 + * c-parser.c (c_parser_declaration_or_fndef): Detect missing + semicolons. + +2017-10-25 Jakub Jelinek <jakub@redhat.com> + + PR libstdc++/81706 + * c-decl.c (merge_decls): Copy "omp declare simd" attributes from + newdecl to corresponding __builtin_ if any. + 2017-10-24 Paolo Carlini <paolo.carlini@oracle.com> PR c++/82466 diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 5c472e6..90f0729 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -2570,6 +2570,8 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) set_builtin_decl_declared_p (fncode, true); break; } + + copy_attributes_to_builtin (newdecl); } } else diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 6b84324..68c45fd 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -2241,11 +2241,37 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, } if (!start_function (specs, declarator, all_prefix_attrs)) { - /* This can appear in many cases looking nothing like a - function definition, so we don't give a more specific - error suggesting there was one. */ - c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, %<asm%> " - "or %<__attribute__%>"); + /* At this point we've consumed: + declaration-specifiers declarator + and the next token isn't CPP_EQ, CPP_COMMA, CPP_SEMICOLON, + RID_ASM, RID_ATTRIBUTE, or RID_IN, + but the + declaration-specifiers declarator + aren't grokkable as a function definition, so we have + an error. */ + gcc_assert (!c_parser_next_token_is (parser, CPP_SEMICOLON)); + if (c_parser_next_token_starts_declspecs (parser)) + { + /* If we have + declaration-specifiers declarator decl-specs + then assume we have a missing semicolon, which would + give us: + declaration-specifiers declarator decl-specs + ^ + ; + <~~~~~~~~~ declaration ~~~~~~~~~~> + Use c_parser_require to get an error with a fix-it hint. */ + c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"); + parser->error = false; + } + else + { + /* This can appear in many cases looking nothing like a + function definition, so we don't give a more specific + error suggesting there was one. */ + c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, %<asm%> " + "or %<__attribute__%>"); + } if (nested) c_pop_function_context (); break; diff --git a/gcc/caller-save.c b/gcc/caller-save.c index 7c787f7..576a023 100644 --- a/gcc/caller-save.c +++ b/gcc/caller-save.c @@ -1034,10 +1034,7 @@ mark_referenced_regs (rtx *loc, refmarker_fn *mark, void *arg) /* If we're setting only part of a multi-word register, we shall mark it as referenced, because the words that are not being set should be restored. */ - && ((GET_MODE_SIZE (GET_MODE (*loc)) - >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (*loc)))) - || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (*loc))) - <= UNITS_PER_WORD)))) + && !read_modify_subreg_p (*loc))) return; } if (code == MEM || code == SUBREG) diff --git a/gcc/combine.c b/gcc/combine.c index d71e50f..93adfc1 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -579,10 +579,7 @@ find_single_use_1 (rtx dest, rtx *loc) && !REG_P (SET_DEST (x)) && ! (GET_CODE (SET_DEST (x)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (x))) - && (((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (x)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)))) + && !read_modify_subreg_p (SET_DEST (x)))) break; return find_single_use_1 (dest, &SET_SRC (x)); @@ -7361,15 +7358,12 @@ expand_field_assignment (const_rtx x) } } - /* A SUBREG between two modes that occupy the same numbers of words - can be done by moving the SUBREG to the source. */ + /* If the destination is a subreg that overwrites the whole of the inner + register, we can move the subreg to the source. */ else if (GET_CODE (SET_DEST (x)) == SUBREG /* We need SUBREGs to compute nonzero_bits properly. */ && nonzero_sign_valid - && (((GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (x)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))) + && !read_modify_subreg_p (SET_DEST (x))) { x = gen_rtx_SET (SUBREG_REG (SET_DEST (x)), gen_lowpart @@ -13993,10 +13987,7 @@ move_deaths (rtx x, rtx maybe_kill_insn, int from_luid, rtx_insn *to_insn, if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == STRICT_LOW_PART || (GET_CODE (dest) == SUBREG - && (((GET_MODE_SIZE (GET_MODE (dest)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))) + && !read_modify_subreg_p (dest))) { move_deaths (dest, maybe_kill_insn, from_luid, to_insn, pnotes); return; diff --git a/gcc/config.in b/gcc/config.in index 89d7108..5651bcb 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -717,6 +717,12 @@ #endif +/* Define if your assembler supports -xbrace_comment option. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_XBRACE_COMMENT_OPTION +#endif + + /* Define to 1 if you have the `atoq' function. */ #ifndef USED_FOR_TARGET #undef HAVE_ATOQ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d1aaf19..1cc1043 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2876,16 +2876,13 @@ aarch64_frame_pointer_required (void) && !df_regs_ever_live_p (LR_REGNUM))) return true; - /* Force a frame pointer for EH returns so the return address is at FP+8. */ - if (crtl->calls_eh_return) - return true; - return false; } /* Mark the registers that need to be saved by the callee and calculate the size of the callee-saved registers area and frame record (both FP - and LR may be omitted). */ + and LR may be omitted). If the function is not a leaf, ensure LR is + saved at the bottom of the callee-save area. */ static void aarch64_layout_frame (void) { @@ -2895,6 +2892,10 @@ aarch64_layout_frame (void) if (reload_completed && cfun->machine->frame.laid_out) return; + /* Force a frame chain for EH returns so the return address is at FP+8. */ + cfun->machine->frame.emit_frame_chain + = frame_pointer_needed || crtl->calls_eh_return; + #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) @@ -2929,14 +2930,21 @@ aarch64_layout_frame (void) last_fp_reg = regno; } - if (frame_pointer_needed) + if (cfun->machine->frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ cfun->machine->frame.reg_offset[R29_REGNUM] = 0; cfun->machine->frame.wb_candidate1 = R29_REGNUM; cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; cfun->machine->frame.wb_candidate2 = R30_REGNUM; - offset += 2 * UNITS_PER_WORD; + offset = 2 * UNITS_PER_WORD; + } + else if (!crtl->is_leaf) + { + /* Ensure LR is saved at the bottom of the callee-saves. */ + cfun->machine->frame.reg_offset[R30_REGNUM] = 0; + cfun->machine->frame.wb_candidate1 = R30_REGNUM; + offset = UNITS_PER_WORD; } /* Now assign stack slots for them. */ @@ -3035,20 +3043,6 @@ aarch64_layout_frame (void) cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } - else if (!frame_pointer_needed - && varargs_and_saved_regs_size < max_push_offset) - { - /* Frame with large local area and outgoing arguments (this pushes the - callee-saves first, followed by the locals and outgoing area): - stp reg1, reg2, [sp, -varargs_and_saved_regs_size]! - stp reg3, reg4, [sp, 16] - sub sp, sp, frame_size - varargs_and_saved_regs_size */ - cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size; - cfun->machine->frame.final_adjust - = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; - cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust; - cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset; - } else { /* Frame with large local area and outgoing arguments using frame pointer: @@ -3665,6 +3659,7 @@ aarch64_expand_prologue (void) HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; rtx_insn *insn; /* Sign return address for functions. */ @@ -3697,7 +3692,7 @@ aarch64_expand_prologue (void) if (callee_adjust != 0) aarch64_push_regs (reg1, reg2, callee_adjust); - if (frame_pointer_needed) + if (emit_frame_chain) { if (callee_adjust == 0) aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, @@ -3705,14 +3700,14 @@ aarch64_expand_prologue (void) insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, GEN_INT (callee_offset))); - RTX_FRAME_RELATED_P (insn) = 1; + RTX_FRAME_RELATED_P (insn) = frame_pointer_needed; emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, - callee_adjust != 0 || frame_pointer_needed); + callee_adjust != 0 || emit_frame_chain); aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); } @@ -4727,16 +4722,20 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, /* Split an out-of-range address displacement into a base and offset. Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise to increase opportunities for sharing the base address of different sizes. - For unaligned accesses and TI/TF mode use the signed 9-bit range. */ + Unaligned accesses use the signed 9-bit range, TImode/TFmode use + the intersection of signed scaled 7-bit and signed 9-bit offset. */ static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { HOST_WIDE_INT offset = INTVAL (*disp); - HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); + HOST_WIDE_INT base; - if (mode == TImode || mode == TFmode - || (offset & (GET_MODE_SIZE (mode) - 1)) != 0) + if (mode == TImode || mode == TFmode) + base = (offset + 0x100) & ~0x1f8; + else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0) base = (offset + 0x100) & ~0x1ff; + else + base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); *off = GEN_INT (base); *disp = GEN_INT (offset - base); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 75fda01..bc1ccc3 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -343,9 +343,9 @@ extern unsigned aarch64_architecture_version; (epilogue_completed && (REGNO) == LR_REGNUM) /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, - the stack pointer does not matter. The value is tested only in - functions that have frame pointers. */ -#define EXIT_IGNORE_STACK 1 + the stack pointer does not matter. This is only true if the function + uses alloca. */ +#define EXIT_IGNORE_STACK (cfun->calls_alloca) #define STATIC_CHAIN_REGNUM R18_REGNUM #define HARD_FRAME_POINTER_REGNUM R29_REGNUM @@ -595,6 +595,9 @@ struct GTY (()) aarch64_frame /* The size of the stack adjustment after saving callee-saves. */ HOST_WIDE_INT final_adjust; + /* Store FP,LR and setup a frame pointer. */ + bool emit_frame_chain; + unsigned wb_candidate1; unsigned wb_candidate2; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 389f2f9..eee836b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5155,7 +5155,9 @@ (define_expand "lrint<GPF:mode><GPI:mode>2" [(match_operand:GPI 0 "register_operand") (match_operand:GPF 1 "register_operand")] - "TARGET_FLOAT" + "TARGET_FLOAT + && ((GET_MODE_SIZE (<GPF:MODE>mode) <= GET_MODE_SIZE (<GPI:MODE>mode)) + || !flag_trapping_math || flag_fp_int_builtin_inexact)" { rtx cvt = gen_reg_rtx (<GPF:MODE>mode); emit_insn (gen_rint<GPF:mode>2 (cvt, operands[1])); diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index fe80a27..8fa234f 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -165,6 +165,7 @@ static bool cris_function_value_regno_p (const unsigned int); static void cris_file_end (void); static unsigned int cris_hard_regno_nregs (unsigned int, machine_mode); static bool cris_hard_regno_mode_ok (unsigned int, machine_mode); +static HOST_WIDE_INT cris_static_rtx_alignment (machine_mode); static HOST_WIDE_INT cris_constant_alignment (const_tree, HOST_WIDE_INT); /* This is the parsed result of the "-max-stack-stackframe=" option. If @@ -288,6 +289,8 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION; #undef TARGET_HARD_REGNO_MODE_OK #define TARGET_HARD_REGNO_MODE_OK cris_hard_regno_mode_ok +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT cris_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT cris_constant_alignment @@ -4329,6 +4332,26 @@ cris_hard_regno_mode_ok (unsigned int regno, machine_mode mode) || (regno != CRIS_MOF_REGNUM && regno != CRIS_ACR_REGNUM))); } +/* Return the preferred minimum alignment for a static object. */ + +static HOST_WIDE_INT +cris_preferred_mininum_alignment (void) +{ + if (!TARGET_CONST_ALIGN) + return 8; + if (TARGET_ALIGN_BY_32) + return 32; + return 16; +} + +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +cris_static_rtx_alignment (machine_mode mode) +{ + return MAX (cris_preferred_mininum_alignment (), GET_MODE_ALIGNMENT (mode)); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Note that this hook has the effect of making gcc believe that ALL references to constant stuff (in code segment, like strings) have this alignment. That is a rather @@ -4339,11 +4362,7 @@ cris_hard_regno_mode_ok (unsigned int regno, machine_mode mode) static HOST_WIDE_INT cris_constant_alignment (const_tree, HOST_WIDE_INT basic_align) { - if (!TARGET_CONST_ALIGN) - return basic_align; - if (TARGET_ALIGN_BY_32) - return MAX (basic_align, 32); - return MAX (basic_align, 16); + return MAX (cris_preferred_mininum_alignment (), basic_align); } #if 0 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 72f57f7..5dc5fae 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -14005,6 +14005,326 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_EQ_OQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_EQ_OQ, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LT_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LT_OS, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmple_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LE_OS, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_LE_OS, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_UNORD_Q, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_UNORD_Q, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NEQ_UQ, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NEQ_UQ, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLT_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLT_US, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLE_US, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_NLE_US, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_ORD_Q, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) +{ + return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, + (__v8df) __Y, _CMP_ORD_Q, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_EQ_OQ, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_EQ_OQ, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LT_OS, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LT_OS, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmple_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LE_OS, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_LE_OS, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_UNORD_Q, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_UNORD_Q, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NEQ_UQ, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NEQ_UQ, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLT_US, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLT_US, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLE_US, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_NLE_US, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_ORD_Q, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) +{ + return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, + (__v16sf) __Y, _CMP_ORD_Q, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) { return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 367cade..1facf12 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28741,6 +28741,18 @@ ix86_sched_init_global (FILE *, int, int) } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +ix86_static_rtx_alignment (machine_mode mode) +{ + if (mode == DFmode) + return 64; + if (ALIGN_MODE_128 (mode)) + return MAX (128, GET_MODE_ALIGNMENT (mode)); + return GET_MODE_ALIGNMENT (mode); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. */ static HOST_WIDE_INT @@ -28749,10 +28761,9 @@ ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST || TREE_CODE (exp) == INTEGER_CST) { - if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) - return 64; - else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) - return 128; + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); + return MAX (mode_align, align); } else if (!optimize_size && TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) @@ -44490,7 +44501,6 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* We should have separate costs for unaligned loads and gather/scatter. Do that incrementally. */ case unaligned_load: - case vector_gather_load: index = sse_store_index (mode); return ix86_vec_cost (mode, COSTS_N_INSNS @@ -44498,13 +44508,28 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, true); case unaligned_store: - case vector_scatter_store: index = sse_store_index (mode); return ix86_vec_cost (mode, COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2, true); + case vector_gather_load: + return ix86_vec_cost (mode, + COSTS_N_INSNS + (ix86_cost->gather_static + + ix86_cost->gather_per_elt + * TYPE_VECTOR_SUBPARTS (vectype)) / 2, + true); + + case vector_scatter_store: + return ix86_vec_cost (mode, + COSTS_N_INSNS + (ix86_cost->scatter_static + + ix86_cost->scatter_per_elt + * TYPE_VECTOR_SUBPARTS (vectype)) / 2, + true); + case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; @@ -50281,6 +50306,8 @@ ix86_run_selftests (void) #undef TARGET_CAN_CHANGE_MODE_CLASS #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 27fc9f0..837906b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -253,6 +253,10 @@ struct processor_costs { const int mmxsse_to_integer; /* cost of moving mmxsse register to integer. */ const int ssemmx_to_integer; /* cost of moving integer to mmxsse register. */ + const int gather_static, gather_per_elt; /* Cost of gather load is computed + as static + per_item * nelts. */ + const int scatter_static, scatter_per_elt; /* Cost of gather store is + computed as static + per_item * nelts. */ const int l1_cache_size; /* size of l1 cache, in kilobytes. */ const int l2_cache_size; /* size of l2 cache, in kilobytes. */ const int prefetch_block; /* bytes moved to cache for prefetch. */ diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h index 6173360..05e5e1a 100644 --- a/gcc/config/i386/sol2.h +++ b/gcc/config/i386/sol2.h @@ -65,8 +65,16 @@ along with GCC; see the file COPYING3. If not see #define ASM_CPU64_DEFAULT_SPEC "-xarch=generic64" #endif +/* Since Studio 12.6, as needs -xbrace_comment=no so its AVX512 syntax is + fully compatible with gas. */ +#ifdef HAVE_AS_XBRACE_COMMENT_OPTION +#define ASM_XBRACE_COMMENT_SPEC "-xbrace_comment=no" +#else +#define ASM_XBRACE_COMMENT_SPEC "" +#endif + #undef ASM_CPU_SPEC -#define ASM_CPU_SPEC "%(asm_cpu_default)" +#define ASM_CPU_SPEC "%(asm_cpu_default) " ASM_XBRACE_COMMENT_SPEC /* Don't include ASM_PIC_SPEC. While the Solaris 10+ assembler accepts -K PIC, it gives many warnings: diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index e31d7ce..c7ac70e 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -82,6 +82,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ {3, 3, 3, 3, 3}, /* cost of unaligned SSE store in 128bit, 256bit and 512bit */ 3, 3, /* SSE->integer and integer->SSE moves */ + 5, 0, /* Gather load static, per_elt. */ + 5, 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ 0, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -166,6 +168,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ 0, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -249,6 +253,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 4, /* size of l1 cache. 486 has 8kB cache shared for code and data, so 4kB is not really precise. */ @@ -334,6 +340,8 @@ struct processor_costs pentium_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 8, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -410,6 +418,8 @@ struct processor_costs lakemont_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 8, /* size of l2 cache */ 0, /* size of prefetch block */ @@ -501,6 +511,8 @@ struct processor_costs pentiumpro_cost = { in 32,64,128,256 and 512-bit */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 3, 3, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 256, /* size of l2 cache */ 32, /* size of prefetch block */ @@ -584,6 +596,8 @@ struct processor_costs geode_cost = { in 32,64,128,256 and 512-bit */ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* Gather load static, per_elt. */ + 2, 2, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 128, /* size of l2 cache. */ 32, /* size of prefetch block */ @@ -666,6 +680,8 @@ struct processor_costs k6_cost = { in 32,64,128,256 and 512-bit */ {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves */ + 2, 2, /* Gather load static, per_elt. */ + 2, 2, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 32, /* size of l2 cache. Some models have integrated l2 cache, but @@ -754,6 +770,8 @@ struct processor_costs athlon_cost = { in 32,64,128,256 and 512-bit */ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -844,6 +862,8 @@ struct processor_costs k8_cost = { in 32,64,128,256 and 512-bit */ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 5, 5, /* SSE->integer and integer->SSE moves */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -946,6 +966,8 @@ struct processor_costs amdfam10_cost = { 1/1 1/1 MOVD reg32, xmmreg Double FADD 3 1/1 1/1 */ + 4, 4, /* Gather load static, per_elt. */ + 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1041,6 +1063,8 @@ const struct processor_costs bdver1_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1138,6 +1162,8 @@ const struct processor_costs bdver2_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1234,6 +1260,8 @@ struct processor_costs bdver3_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1329,6 +1357,8 @@ struct processor_costs bdver4_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 20, 30}, /* cost of unaligned stores. */ 16, 20, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1435,6 +1465,11 @@ struct processor_costs znver1_cost = { in 32,64,128,256 and 512-bit. */ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 6, 6, /* SSE->integer and integer->SSE moves. */ + /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, + throughput 12. Approx 9 uops do not depend on vector size and every load + is 7 uops. */ + 18, 8, /* Gather load static, per_elt. */ + 18, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block. */ @@ -1539,6 +1574,8 @@ const struct processor_costs btver1_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ + 10, 10, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1624,6 +1661,8 @@ const struct processor_costs btver2_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 12, 24, 48}, /* cost of unaligned stores. */ 14, 14, /* SSE->integer and integer->SSE moves */ + 10, 10, /* Gather load static, per_elt. */ + 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 2048, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1708,6 +1747,8 @@ struct processor_costs pentium4_cost = { in 32,64,128,256 and 512-bit */ {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ 20, 12, /* SSE->integer and integer->SSE moves */ + 16, 16, /* Gather load static, per_elt. */ + 16, 16, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1795,6 +1836,8 @@ struct processor_costs nocona_cost = { in 32,64,128,256 and 512-bit */ {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ 20, 12, /* SSE->integer and integer->SSE moves */ + 12, 12, /* Gather load static, per_elt. */ + 12, 12, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ 1024, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1880,6 +1923,8 @@ struct processor_costs atom_cost = { in 32,64,128,256 and 512-bit */ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 8, 6, /* SSE->integer and integer->SSE moves */ + 8, 8, /* Gather load static, per_elt. */ + 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -1965,6 +2010,8 @@ struct processor_costs slm_cost = { in 32,64,128,256 and 512-bit */ {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 8, 6, /* SSE->integer and integer->SSE moves */ + 8, 8, /* Gather load static, per_elt. */ + 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2050,6 +2097,8 @@ struct processor_costs intel_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ 4, 4, /* SSE->integer and integer->SSE moves */ + 6, 6, /* Gather load static, per_elt. */ + 6, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 256, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2142,6 +2191,8 @@ struct processor_costs generic_cost = { in 32,64,128,256 and 512-bit */ {10, 10, 10, 15, 20}, /* cost of unaligned storess. */ 20, 20, /* SSE->integer and integer->SSE moves */ + 6, 6, /* Gather load static, per_elt. */ + 6, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ @@ -2239,6 +2290,11 @@ struct processor_costs core_cost = { in 32,64,128,256 and 512-bit */ {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ 2, 2, /* SSE->integer and integer->SSE moves */ + /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, + rec. throughput 6. + So 5 uops statically and one uops per load. */ + 10, 6, /* Gather load static, per_elt. */ + 10, 6, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block */ diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c index e911594..4a73162 100644 --- a/gcc/config/mmix/mmix.c +++ b/gcc/config/mmix/mmix.c @@ -168,6 +168,7 @@ static void mmix_print_operand (FILE *, rtx, int); static void mmix_print_operand_address (FILE *, machine_mode, rtx); static bool mmix_print_operand_punct_valid_p (unsigned char); static void mmix_conditional_register_usage (void); +static HOST_WIDE_INT mmix_static_rtx_alignment (machine_mode); static HOST_WIDE_INT mmix_constant_alignment (const_tree, HOST_WIDE_INT); static HOST_WIDE_INT mmix_starting_frame_offset (void); @@ -284,6 +285,8 @@ static HOST_WIDE_INT mmix_starting_frame_offset (void); #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE mmix_option_override +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT mmix_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT mmix_constant_alignment @@ -342,6 +345,14 @@ mmix_data_alignment (tree type ATTRIBUTE_UNUSED, int basic_align) return basic_align; } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +static HOST_WIDE_INT +mmix_static_rtx_alignment (machine_mode mode) +{ + return MAX (GET_MODE_ALIGNMENT (mode), 32); +} + /* Implement tARGET_CONSTANT_ALIGNMENT. */ static HOST_WIDE_INT diff --git a/gcc/config/nios2/constraints.md b/gcc/config/nios2/constraints.md index c6c53926..51f71cf 100644 --- a/gcc/config/nios2/constraints.md +++ b/gcc/config/nios2/constraints.md @@ -95,8 +95,8 @@ (match_test "TARGET_ARCH_R2 && ANDCLEAR_INT (ival)"))) (define_constraint "S" - "An immediate stored in small data, accessible by GP." - (match_test "gprel_constant_p (op)")) + "An immediate stored in small data, accessible by GP, or by offset from r0." + (match_test "gprel_constant_p (op) || r0rel_constant_p (op)")) (define_constraint "T" "A constant unspec offset representing a relocation." diff --git a/gcc/config/nios2/nios2-protos.h b/gcc/config/nios2/nios2-protos.h index 6df65bb..84d450b 100644 --- a/gcc/config/nios2/nios2-protos.h +++ b/gcc/config/nios2/nios2-protos.h @@ -52,6 +52,7 @@ extern const char * nios2_add_insn_asm (rtx_insn *, rtx *); extern bool nios2_legitimate_pic_operand_p (rtx); extern bool gprel_constant_p (rtx); +extern bool r0rel_constant_p (rtx); extern bool nios2_regno_ok_for_base_p (int, bool); extern bool nios2_unspec_reloc_p (rtx); diff --git a/gcc/config/nios2/nios2.c b/gcc/config/nios2/nios2.c index f5963d4..cdd5e9a 100644 --- a/gcc/config/nios2/nios2.c +++ b/gcc/config/nios2/nios2.c @@ -49,6 +49,7 @@ #include "stor-layout.h" #include "builtins.h" #include "tree-pass.h" +#include "xregex.h" /* This file should be included last. */ #include "target-def.h" @@ -103,6 +104,10 @@ static int custom_code_index[256]; /* Set to true if any conflicts (re-use of a code between 0-255) are found. */ static bool custom_code_conflict = false; +/* State for command-line options. */ +regex_t nios2_gprel_sec_regex; +regex_t nios2_r0rel_sec_regex; + /* Definition of builtin function types for nios2. */ @@ -1371,6 +1376,31 @@ nios2_option_override (void) nios2_gpopt_option = gpopt_local; } + /* GP-relative and r0-relative addressing don't make sense for PIC. */ + if (flag_pic) + { + if (nios2_gpopt_option != gpopt_none) + error ("-mgpopt not supported with PIC."); + if (nios2_gprel_sec) + error ("-mgprel-sec= not supported with PIC."); + if (nios2_r0rel_sec) + error ("-mr0rel-sec= not supported with PIC."); + } + + /* Process -mgprel-sec= and -m0rel-sec=. */ + if (nios2_gprel_sec) + { + if (regcomp (&nios2_gprel_sec_regex, nios2_gprel_sec, + REG_EXTENDED | REG_NOSUB)) + error ("-mgprel-sec= argument is not a valid regular expression."); + } + if (nios2_r0rel_sec) + { + if (regcomp (&nios2_r0rel_sec_regex, nios2_r0rel_sec, + REG_EXTENDED | REG_NOSUB)) + error ("-mr0rel-sec= argument is not a valid regular expression."); + } + /* If we don't have mul, we don't have mulx either! */ if (!TARGET_HAS_MUL && TARGET_HAS_MULX) target_flags &= ~MASK_HAS_MULX; @@ -1457,7 +1487,7 @@ nios2_rtx_costs (rtx x, machine_mode mode, case SYMBOL_REF: case CONST: case CONST_DOUBLE: - if (gprel_constant_p (x)) + if (gprel_constant_p (x) || r0rel_constant_p (x)) { *total = COSTS_N_INSNS (1); return true; @@ -2007,6 +2037,7 @@ nios2_symbolic_constant_p (rtx x) return (SYMBOL_REF_P (base) && !SYMBOL_REF_TLS_MODEL (base) && !gprel_constant_p (base) + && !r0rel_constant_p (base) && SMALL_INT (INTVAL (offset))); } return false; @@ -2108,7 +2139,7 @@ nios2_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, /* Else, fall through. */ case CONST: - if (gprel_constant_p (operand)) + if (gprel_constant_p (operand) || r0rel_constant_p (operand)) return true; /* Else, fall through. */ @@ -2268,7 +2299,17 @@ nios2_small_section_name_p (const char *section) return (strcmp (section, ".sbss") == 0 || strncmp (section, ".sbss.", 6) == 0 || strcmp (section, ".sdata") == 0 - || strncmp (section, ".sdata.", 7) == 0); + || strncmp (section, ".sdata.", 7) == 0 + || (nios2_gprel_sec + && regexec (&nios2_gprel_sec_regex, section, 0, NULL, 0) == 0)); +} + +/* Return true if SECTION is a r0-relative section name. */ +static bool +nios2_r0rel_section_name_p (const char *section) +{ + return (nios2_r0rel_sec + && regexec (&nios2_r0rel_sec_regex, section, 0, NULL, 0) == 0); } /* Return true if EXP should be placed in the small data section. */ @@ -2377,6 +2418,33 @@ nios2_symbol_ref_in_small_data_p (rtx sym) } } +/* Likewise for r0-relative addressing. */ +static bool +nios2_symbol_ref_in_r0rel_data_p (rtx sym) +{ + tree decl; + + gcc_assert (GET_CODE (sym) == SYMBOL_REF); + decl = SYMBOL_REF_DECL (sym); + + /* TLS variables are not accessed through r0. */ + if (SYMBOL_REF_TLS_MODEL (sym) != 0) + return false; + + /* On Nios II R2, there is no r0-relative relocation that can be + used with "io" instructions. So, if we are implicitly generating + those instructions, we cannot emit r0-relative accesses. */ + if (TARGET_ARCH_R2 + && (TARGET_BYPASS_CACHE || TARGET_BYPASS_CACHE_VOLATILE)) + return false; + + /* If the user has explicitly placed the symbol in a r0rel section + via an attribute, generate r0-relative addressing. */ + if (decl && DECL_SECTION_NAME (decl)) + return nios2_r0rel_section_name_p (DECL_SECTION_NAME (decl)); + return false; +} + /* Implement TARGET_SECTION_TYPE_FLAGS. */ static unsigned int @@ -2610,8 +2678,9 @@ nios2_emit_move_sequence (rtx *operands, machine_mode mode) return true; } } - else if (gprel_constant_p (from)) - /* Handled directly by movsi_internal as gp + offset. */ + else if (gprel_constant_p (from) || r0rel_constant_p (from)) + /* Handled directly by movsi_internal as gp + offset + or r0 + offset. */ ; else if (nios2_large_constant_p (from)) /* This case covers either a regular symbol reference or an UNSPEC @@ -2961,6 +3030,20 @@ gprel_constant_p (rtx op) return false; } +/* Likewise if this is a zero-relative accessible reference. */ +bool +r0rel_constant_p (rtx op) +{ + if (GET_CODE (op) == SYMBOL_REF + && nios2_symbol_ref_in_r0rel_data_p (op)) + return true; + else if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS) + return r0rel_constant_p (XEXP (XEXP (op, 0), 0)); + + return false; +} + /* Return the name string for a supported unspec reloc offset. */ static const char * nios2_unspec_reloc_name (int unspec) @@ -3025,7 +3108,13 @@ nios2_print_operand_address (FILE *file, machine_mode mode, rtx op) fprintf (file, ")(%s)", reg_names[GP_REGNO]); return; } - + else if (r0rel_constant_p (op)) + { + fprintf (file, "%%lo("); + output_addr_const (file, op); + fprintf (file, ")(r0)"); + return; + } break; case PLUS: @@ -4631,8 +4720,8 @@ nios2_cdx_narrow_form_p (rtx_insn *insn) || TARGET_BYPASS_CACHE) return false; addr = XEXP (mem, 0); - /* GP-based references are never narrow. */ - if (gprel_constant_p (addr)) + /* GP-based and R0-based references are never narrow. */ + if (gprel_constant_p (addr) || r0rel_constant_p (addr)) return false; /* %lo requires a 16-bit relocation and is never narrow. */ if (GET_CODE (addr) == LO_SUM) @@ -4678,8 +4767,8 @@ nios2_cdx_narrow_form_p (rtx_insn *insn) || TARGET_BYPASS_CACHE) return false; addr = XEXP (mem, 0); - /* GP-based references are never narrow. */ - if (gprel_constant_p (addr)) + /* GP-based and r0-based references are never narrow. */ + if (gprel_constant_p (addr) || r0rel_constant_p (addr)) return false; /* %lo requires a 16-bit relocation and is never narrow. */ if (GET_CODE (addr) == LO_SUM) diff --git a/gcc/config/nios2/nios2.opt b/gcc/config/nios2/nios2.opt index 08cb935..a50dbee 100644 --- a/gcc/config/nios2/nios2.opt +++ b/gcc/config/nios2/nios2.opt @@ -586,3 +586,11 @@ Enable generation of R2 BMX instructions. mcdx Target Report Mask(HAS_CDX) Enable generation of R2 CDX instructions. + +mgprel-sec= +Target RejectNegative Joined Var(nios2_gprel_sec) Init(NULL) +Regular expression matching additional GP-addressible section names. + +mr0rel-sec= +Target RejectNegative Joined Var(nios2_r0rel_sec) Init(NULL) +Regular expression matching section names for r0-relative addressing. diff --git a/gcc/config/riscv/pic.md b/gcc/config/riscv/pic.md index 6a29ead..03b8f9b 100644 --- a/gcc/config/riscv/pic.md +++ b/gcc/config/riscv/pic.md @@ -22,13 +22,20 @@ ;; Simplify PIC loads to static variables. ;; These should go away once we figure out how to emit auipc discretely. -(define_insn "*local_pic_load<mode>" +(define_insn "*local_pic_load_s<mode>" [(set (match_operand:ANYI 0 "register_operand" "=r") - (mem:ANYI (match_operand 1 "absolute_symbolic_operand" "")))] + (sign_extend:ANYI (mem:ANYI (match_operand 1 "absolute_symbolic_operand" ""))))] "USE_LOAD_ADDRESS_MACRO (operands[1])" "<load>\t%0,%1" [(set (attr "length") (const_int 8))]) +(define_insn "*local_pic_load_u<mode>" + [(set (match_operand:ZERO_EXTEND_LOAD 0 "register_operand" "=r") + (zero_extend:ZERO_EXTEND_LOAD (mem:ZERO_EXTEND_LOAD (match_operand 1 "absolute_symbolic_operand" ""))))] + "USE_LOAD_ADDRESS_MACRO (operands[1])" + "<load>u\t%0,%1" + [(set (attr "length") (const_int 8))]) + (define_insn "*local_pic_load<mode>" [(set (match_operand:ANYF 0 "register_operand" "=f") (mem:ANYF (match_operand 1 "absolute_symbolic_operand" ""))) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index fd9236c..9f056bb 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -259,6 +259,9 @@ ;; Iterator for QImode extension patterns. (define_mode_iterator SUPERQI [HI SI (DI "TARGET_64BIT")]) +;; Iterator for extending loads. +(define_mode_iterator ZERO_EXTEND_LOAD [QI HI (SI "TARGET_64BIT")]) + ;; Iterator for hardware integer modes narrower than XLEN. (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")]) diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h index 607b42c..7354181 100644 --- a/gcc/config/rs6000/aix.h +++ b/gcc/config/rs6000/aix.h @@ -76,6 +76,9 @@ #undef TARGET_IEEEQUAD #define TARGET_IEEEQUAD 0 +#undef TARGET_IEEEQUAD_DEFAULT +#define TARGET_IEEEQUAD_DEFAULT 0 + /* The AIX linker will discard static constructors in object files before collect has a chance to see them, so scan the object files directly. */ #define COLLECT_EXPORT_LIST diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 9a88a8d..a6a7b2c 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -272,6 +272,9 @@ extern int darwin_emit_branch_islands; #undef TARGET_IEEEQUAD #define TARGET_IEEEQUAD 0 +#undef TARGET_IEEEQUAD_DEFAULT +#define TARGET_IEEEQUAD_DEFAULT 0 + /* Since Darwin doesn't do TOCs, stub this out. */ #define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) ((void)X, (void)MODE, 0) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 3095419..3162d52 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -86,6 +86,20 @@ #define TARGET_NO_PROTOTYPE 0 #endif + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server + systems will also set long double to be IEEE 128-bit. AIX and Darwin + explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so + those systems will not pick up this default. This needs to be after all + of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are + properly defined. */ +#ifndef TARGET_IEEEQUAD_DEFAULT +#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) +#define TARGET_IEEEQUAD_DEFAULT 1 +#else +#define TARGET_IEEEQUAD_DEFAULT 0 +#endif +#endif + #define min(A,B) ((A) < (B) ? (A) : (B)) #define max(A,B) ((A) > (B) ? (A) : (B)) @@ -2878,6 +2892,13 @@ rs6000_debug_reg_global (void) fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); fprintf (stderr, DEBUG_FMT_D, "long_double_size", rs6000_long_double_type_size); + if (rs6000_long_double_type_size == 128) + { + fprintf (stderr, DEBUG_FMT_S, "long double type", + TARGET_IEEEQUAD ? "IEEE" : "IBM"); + fprintf (stderr, DEBUG_FMT_S, "default long double type", + TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM"); + } fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", (int)rs6000_sched_restricted_insns_priority); fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", @@ -4560,13 +4581,26 @@ rs6000_option_override_internal (bool global_init_p) rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; } - /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin - explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not - pick up this default. */ -#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server + systems will also set long double to be IEEE 128-bit. AIX and Darwin + explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so + those systems will not pick up this default. Warn if the user changes the + default unless -Wno-psabi. */ if (!global_options_set.x_rs6000_ieeequad) - rs6000_ieeequad = 1; -#endif + rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT; + + else if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128) + { + static bool warned_change_long_double; + if (!warned_change_long_double) + { + warned_change_long_double = true; + if (TARGET_IEEEQUAD) + warning (OPT_Wpsabi, "Using IEEE extended precision long double"); + else + warning (OPT_Wpsabi, "Using IBM extended precision long double"); + } + } /* Enable the default support for IEEE 128-bit floating point on Linux VSX sytems. In GCC 7, we would enable the the IEEE 128-bit floating point diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c42818f..e7d0829 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -381,10 +381,10 @@ mabi=d32 Target RejectNegative Undocumented Warn(using old darwin ABI) Var(rs6000_darwin64_abi, 0) mabi=ieeelongdouble -Target RejectNegative Undocumented Warn(using IEEE extended precision long double) Var(rs6000_ieeequad) Save +Target RejectNegative Var(rs6000_ieeequad) Save mabi=ibmlongdouble -Target RejectNegative Undocumented Warn(using IBM extended precision long double) Var(rs6000_ieeequad, 0) +Target RejectNegative Var(rs6000_ieeequad, 0) mcpu= Target RejectNegative Joined Var(rs6000_cpu_index) Init(-1) Enum(rs6000_cpu_opt_value) Save diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index eda7fca..bf21cca 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -7196,6 +7196,18 @@ spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec) return inprec <= 32 && outprec <= inprec; } +/* Implement TARGET_STATIC_RTX_ALIGNMENT. + + Make all static objects 16-byte aligned. This allows us to assume + they are also padded to 16 bytes, which means we can use a single + load or store instruction to access them. */ + +static HOST_WIDE_INT +spu_static_rtx_alignment (machine_mode mode) +{ + return MAX (GET_MODE_ALIGNMENT (mode), 128); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Make all static objects 16-byte aligned. This allows us to assume @@ -7447,6 +7459,8 @@ static const struct attribute_spec spu_attribute_table[] = #undef TARGET_TRULY_NOOP_TRUNCATION #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation +#undef TARGET_STATIC_RTX_ALIGNMENT +#define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment diff --git a/gcc/configure b/gcc/configure index aa5937d..c49e665 100755 --- a/gcc/configure +++ b/gcc/configure @@ -25552,6 +25552,38 @@ $as_echo "$as_me: WARNING: LTO for $target requires binutils >= 2.20.1, but vers ;; esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -xbrace_comment" >&5 +$as_echo_n "checking assembler for -xbrace_comment... " >&6; } +if test "${gcc_cv_as_ix86_xbrace_comment+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_ix86_xbrace_comment=no + if test x$gcc_cv_as != x; then + $as_echo '.text' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -xbrace_comment=no -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_ix86_xbrace_comment=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_ix86_xbrace_comment" >&5 +$as_echo "$gcc_cv_as_ix86_xbrace_comment" >&6; } +if test $gcc_cv_as_ix86_xbrace_comment = yes; then + +$as_echo "#define HAVE_AS_XBRACE_COMMENT_OPTION 1" >>confdefs.h + +fi + + # Test if the assembler supports the section flag 'e' for specifying # an excluded section. { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .section with e" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index d905d0d..7c0a903 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -4103,6 +4103,11 @@ foo: nop ;; esac + gcc_GAS_CHECK_FEATURE([-xbrace_comment], gcc_cv_as_ix86_xbrace_comment,, + [-xbrace_comment=no], [.text],, + [AC_DEFINE(HAVE_AS_XBRACE_COMMENT_OPTION, 1, + [Define if your assembler supports -xbrace_comment option.])]) + # Test if the assembler supports the section flag 'e' for specifying # an excluded section. gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e, diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 8c587a3..29139c5 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,45 @@ +2017-10-26 Nathan Sidwell <nathan@acm.org> + + * decl.c (sort_labels): Restore function. + (pop_labels): Sort labels + (identify_goto): Add translation markup. + +2017-10-25 Nathan Sidwell <nathan@acm.org> + + Kill IDENTIFIER_LABEL_VALUE. + * cp-tree.h (lang_identifier): Delete label_value slot. + (IDENTIFIER_LABEL_VALUE, SET_IDENTIFIER_LABEL_VALUE): Delete. + (struct named_label_hasher): Rename to ... + (struct named_label_hash): ... here. Reimplement. + (struct language_function): Adjust x_named_labels. + * name-lookup.h (struct cp_label_binding): Delete. + (struct cp_binding_level): Delete shadowed_labels slot. + * decl.c (struct named_label_entry): Add name and outer slots. + (pop_label): Rename to ... + (check_label_used): ... here. Don't pop. + (note_label, sort_labels): Delete. + (pop_labels, pop_local_label): Reimplement. + (poplevel): Pop local labels as any other decl. Remove + shadowed_labels handling. + (named_label_hash::hash, named_label_hash::equal): New. + (make_label_decl): Absorb into ... + (lookup_label_1): ... here. Add making_local_p arg, reimplement. + (lookup_label, declare_local_label): Adjust. + (check_goto, define_label): Adjust. + * lex.c (make_conv_op_name): Don't clear IDENTIFIER_LABEL_VALUE. + * ptree.c (cxx_print_identifier): Don't print identifier binding. + + * decl.c (identifier_goto): Reduce duplication. + (check_previous_goto_1): Likewise. + (check_goto): Move var decls to initialization. + (check_omp_return, define_label_1, define_label): Likewise. + +2017-10-25 Jakub Jelinek <jakub@redhat.com> + + PR libstdc++/81706 + * decl.c (duplicate_decls): Copy "omp declare simd" attributes from + newdecl to corresponding __builtin_ if any. + 2017-10-24 Paolo Carlini <paolo.carlini@oracle.com> PR c++/82466 diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index b74b6d9..f2570b003 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -561,7 +561,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; struct GTY(()) lang_identifier { struct c_common_identifier c_common; cxx_binding *bindings; - tree label_value; }; /* Return a typed pointer version of T if it designates a @@ -996,11 +995,6 @@ enum GTY(()) abstract_class_use { #define SET_IDENTIFIER_TYPE_VALUE(NODE,TYPE) (TREE_TYPE (NODE) = (TYPE)) #define IDENTIFIER_HAS_TYPE_VALUE(NODE) (IDENTIFIER_TYPE_VALUE (NODE) ? 1 : 0) -#define IDENTIFIER_LABEL_VALUE(NODE) \ - (LANG_IDENTIFIER_CAST (NODE)->label_value) -#define SET_IDENTIFIER_LABEL_VALUE(NODE, VALUE) \ - IDENTIFIER_LABEL_VALUE (NODE) = (VALUE) - /* Kinds of identifiers. Values are carefully chosen. */ enum cp_identifier_kind { cik_normal = 0, /* Not a special identifier. */ @@ -1662,12 +1656,22 @@ struct cxx_int_tree_map_hasher : ggc_ptr_hash<cxx_int_tree_map> static bool equal (cxx_int_tree_map *, cxx_int_tree_map *); }; -struct named_label_entry; +struct named_label_entry; /* Defined in decl.c. */ -struct named_label_hasher : ggc_ptr_hash<named_label_entry> +struct named_label_hash : ggc_remove <named_label_entry *> { - static hashval_t hash (named_label_entry *); - static bool equal (named_label_entry *, named_label_entry *); + typedef named_label_entry *value_type; + typedef tree compare_type; /* An identifier. */ + + inline static hashval_t hash (value_type); + inline static bool equal (const value_type, compare_type); + + inline static void mark_empty (value_type &p) {p = NULL;} + inline static bool is_empty (value_type p) {return !p;} + + /* Nothing is deletable. Everything is insertable. */ + inline static bool is_deleted (value_type) { return false; } + inline static void mark_deleted (value_type) { gcc_unreachable (); } }; /* Global state pertinent to the current function. */ @@ -1696,7 +1700,8 @@ struct GTY(()) language_function { BOOL_BITFIELD invalid_constexpr : 1; - hash_table<named_label_hasher> *x_named_labels; + hash_table<named_label_hash> *x_named_labels; + cp_binding_level *bindings; vec<tree, va_gc> *x_local_names; /* Tracking possibly infinite loops. This is a vec<tree> only because diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index bb48099..519aa06 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -189,27 +189,33 @@ struct GTY((chain_next ("%h.next"))) named_label_use_entry { function, and so we can check the validity of jumps to these labels. */ struct GTY((for_user)) named_label_entry { - /* The decl itself. */ - tree label_decl; + + tree name; /* Name of decl. */ + + tree label_decl; /* LABEL_DECL, unless deleted local label. */ + + named_label_entry *outer; /* Outer shadowed chain. */ /* The binding level to which the label is *currently* attached. This is initially set to the binding level in which the label is defined, but is modified as scopes are closed. */ cp_binding_level *binding_level; + /* The head of the names list that was current when the label was defined, or the inner scope popped. These are the decls that will be skipped when jumping to the label. */ tree names_in_scope; + /* A vector of all decls from all binding levels that would be crossed by a backward branch to the label. */ vec<tree, va_gc> *bad_decls; /* A list of uses of the label, before the label is defined. */ - struct named_label_use_entry *uses; + named_label_use_entry *uses; /* The following bits are set after the label is defined, and are - updated as scopes are popped. They indicate that a backward jump - to the label will illegally enter a scope of the given flavor. */ + updated as scopes are popped. They indicate that a jump to the + label will illegally enter a scope of the given flavor. */ bool in_try_scope; bool in_catch_scope; bool in_omp_scope; @@ -347,7 +353,7 @@ finish_scope (void) in a valid manner, and issue any appropriate warnings or errors. */ static void -pop_label (tree label, tree old_value) +check_label_used (tree label) { if (!processing_template_decl) { @@ -364,18 +370,6 @@ pop_label (tree label, tree old_value) else warn_for_unused_label (label); } - - SET_IDENTIFIER_LABEL_VALUE (DECL_NAME (label), old_value); -} - -/* Push all named labels into a vector, so that we can sort it on DECL_UID - to avoid code generation differences. */ - -int -note_label (named_label_entry **slot, vec<named_label_entry **> &labels) -{ - labels.quick_push (slot); - return 1; } /* Helper function to sort named label entries in a vector by DECL_UID. */ @@ -383,13 +377,11 @@ note_label (named_label_entry **slot, vec<named_label_entry **> &labels) static int sort_labels (const void *a, const void *b) { - named_label_entry **slot1 = *(named_label_entry **const *) a; - named_label_entry **slot2 = *(named_label_entry **const *) b; - if (DECL_UID ((*slot1)->label_decl) < DECL_UID ((*slot2)->label_decl)) - return -1; - if (DECL_UID ((*slot1)->label_decl) > DECL_UID ((*slot2)->label_decl)) - return 1; - return 0; + tree label1 = *(tree const *) a; + tree label2 = *(tree const *) b; + + /* DECL_UIDs can never be equal. */ + return DECL_UID (label1) > DECL_UID (label2) ? -1 : +1; } /* At the end of a function, all labels declared within the function @@ -399,46 +391,58 @@ sort_labels (const void *a, const void *b) static void pop_labels (tree block) { - if (named_labels) + if (!named_labels) + return; + + /* We need to add the labels to the block chain, so debug + information is emitted. But, we want the order to be stable so + need to sort them first. Otherwise the debug output could be + randomly ordered. I guess it's mostly stable, unless the hash + table implementation changes. */ + auto_vec<tree, 32> labels (named_labels->elements ()); + hash_table<named_label_hash>::iterator end (named_labels->end ()); + for (hash_table<named_label_hash>::iterator iter + (named_labels->begin ()); iter != end; ++iter) { - auto_vec<named_label_entry **, 32> labels; - named_label_entry **slot; - unsigned int i; + named_label_entry *ent = *iter; - /* Push all the labels into a vector and sort them by DECL_UID, - so that gaps between DECL_UIDs don't affect code generation. */ - labels.reserve_exact (named_labels->elements ()); - named_labels->traverse<vec<named_label_entry **> &, note_label> (labels); - labels.qsort (sort_labels); - FOR_EACH_VEC_ELT (labels, i, slot) - { - struct named_label_entry *ent = *slot; + gcc_checking_assert (!ent->outer); + if (ent->label_decl) + labels.quick_push (ent->label_decl); + ggc_free (ent); + } + named_labels = NULL; + labels.qsort (sort_labels); - pop_label (ent->label_decl, NULL_TREE); + while (labels.length ()) + { + tree label = labels.pop (); - /* Put the labels into the "variables" of the top-level block, - so debugger can see them. */ - DECL_CHAIN (ent->label_decl) = BLOCK_VARS (block); - BLOCK_VARS (block) = ent->label_decl; + DECL_CHAIN (label) = BLOCK_VARS (block); + BLOCK_VARS (block) = label; - named_labels->clear_slot (slot); - } - named_labels = NULL; + check_label_used (label); } } /* At the end of a block with local labels, restore the outer definition. */ static void -pop_local_label (tree label, tree old_value) +pop_local_label (tree id, tree label) { - struct named_label_entry dummy; - - pop_label (label, old_value); + check_label_used (label); + named_label_entry **slot = named_labels->find_slot_with_hash + (id, IDENTIFIER_HASH_VALUE (id), NO_INSERT); + named_label_entry *ent = *slot; - dummy.label_decl = label; - named_label_entry **slot = named_labels->find_slot (&dummy, NO_INSERT); - named_labels->clear_slot (slot); + if (ent->outer) + ent = ent->outer; + else + { + ent = ggc_cleared_alloc<named_label_entry> (); + ent->name = id; + } + *slot = ent; } /* The following two routines are used to interface to Objective-C++. @@ -579,7 +583,6 @@ poplevel (int keep, int reverse, int functionbody) int leaving_for_scope; scope_kind kind; unsigned ix; - cp_label_binding *label_bind; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); restart: @@ -613,11 +616,12 @@ poplevel (int keep, int reverse, int functionbody) Usually current_binding_level->names is in reverse order. But parameter decls were previously put in forward order. */ + decls = current_binding_level->names; if (reverse) - current_binding_level->names - = decls = nreverse (current_binding_level->names); - else - decls = current_binding_level->names; + { + decls = nreverse (decls); + current_binding_level->names = decls; + } /* If there were any declarations or structure tags in that level, or if this level is a function body, @@ -770,7 +774,10 @@ poplevel (int keep, int reverse, int functionbody) } } /* Remove the binding. */ - pop_local_binding (name, decl); + if (TREE_CODE (decl) == LABEL_DECL) + pop_local_label (name, decl); + else + pop_local_binding (name, decl); } /* Remove declarations for any `for' variables from inner scopes @@ -784,11 +791,6 @@ poplevel (int keep, int reverse, int functionbody) link; link = TREE_CHAIN (link)) SET_IDENTIFIER_TYPE_VALUE (TREE_PURPOSE (link), TREE_VALUE (link)); - /* Restore the IDENTIFIER_LABEL_VALUEs for local labels. */ - FOR_EACH_VEC_SAFE_ELT_REVERSE (current_binding_level->shadowed_labels, - ix, label_bind) - pop_local_label (label_bind->label, label_bind->prev_value); - /* There may be OVERLOADs (wrapped in TREE_LISTs) on the BLOCK_VARs list if a `using' declaration put them there. The debugging back ends won't understand OVERLOAD, so we remove them here. @@ -2478,6 +2480,8 @@ next_arg:; break; } } + + copy_attributes_to_builtin (newdecl); } if (new_defines_function) /* If defining a function declared with other language @@ -2947,81 +2951,83 @@ redeclaration_error_message (tree newdecl, tree olddecl) } } + /* Hash and equality functions for the named_label table. */ hashval_t -named_label_hasher::hash (named_label_entry *ent) +named_label_hash::hash (const value_type entry) { - return DECL_UID (ent->label_decl); + return IDENTIFIER_HASH_VALUE (entry->name); } bool -named_label_hasher::equal (named_label_entry *a, named_label_entry *b) +named_label_hash::equal (const value_type entry, compare_type name) { - return a->label_decl == b->label_decl; + return name == entry->name; } -/* Create a new label, named ID. */ +/* Look for a label named ID in the current function. If one cannot + be found, create one. Return the named_label_entry, or NULL on + failure. */ -static tree -make_label_decl (tree id, int local_p) +static named_label_entry * +lookup_label_1 (tree id, bool making_local_p) { - struct named_label_entry *ent; - tree decl; - - decl = build_decl (input_location, LABEL_DECL, id, void_type_node); - - DECL_CONTEXT (decl) = current_function_decl; - SET_DECL_MODE (decl, VOIDmode); - C_DECLARED_LABEL_FLAG (decl) = local_p; - - /* Say where one reference is to the label, for the sake of the - error if it is not defined. */ - DECL_SOURCE_LOCATION (decl) = input_location; - - /* Record the fact that this identifier is bound to this label. */ - SET_IDENTIFIER_LABEL_VALUE (id, decl); + /* You can't use labels at global scope. */ + if (current_function_decl == NULL_TREE) + { + error ("label %qE referenced outside of any function", id); + return NULL; + } - /* Create the label htab for the function on demand. */ if (!named_labels) - named_labels = hash_table<named_label_hasher>::create_ggc (13); + named_labels = hash_table<named_label_hash>::create_ggc (13); - /* Record this label on the list of labels used in this function. - We do this before calling make_label_decl so that we get the - IDENTIFIER_LABEL_VALUE before the new label is declared. */ - ent = ggc_cleared_alloc<named_label_entry> (); - ent->label_decl = decl; - - named_label_entry **slot = named_labels->find_slot (ent, INSERT); - gcc_assert (*slot == NULL); - *slot = ent; + hashval_t hash = IDENTIFIER_HASH_VALUE (id); + named_label_entry **slot + = named_labels->find_slot_with_hash (id, hash, INSERT); + named_label_entry *old = *slot; + + if (old && old->label_decl) + { + if (!making_local_p) + return old; - return decl; -} + if (old->binding_level == current_binding_level) + { + error ("local label %qE conflicts with existing label", id); + inform (DECL_SOURCE_LOCATION (old->label_decl), "previous label"); + return NULL; + } + } -/* Look for a label named ID in the current function. If one cannot - be found, create one. (We keep track of used, but undefined, - labels, and complain about them at the end of a function.) */ + /* We are making a new decl, create or reuse the named_label_entry */ + named_label_entry *ent = NULL; + if (old && !old->label_decl) + ent = old; + else + { + ent = ggc_cleared_alloc<named_label_entry> (); + ent->name = id; + ent->outer = old; + *slot = ent; + } -static tree -lookup_label_1 (tree id) -{ - tree decl; + /* Now create the LABEL_DECL. */ + tree decl = build_decl (input_location, LABEL_DECL, id, void_type_node); - /* You can't use labels at global scope. */ - if (current_function_decl == NULL_TREE) + DECL_CONTEXT (decl) = current_function_decl; + SET_DECL_MODE (decl, VOIDmode); + if (making_local_p) { - error ("label %qE referenced outside of any function", id); - return NULL_TREE; + C_DECLARED_LABEL_FLAG (decl) = true; + DECL_CHAIN (decl) = current_binding_level->names; + current_binding_level->names = decl; } - /* See if we've already got this label. */ - decl = IDENTIFIER_LABEL_VALUE (id); - if (decl != NULL_TREE && DECL_CONTEXT (decl) == current_function_decl) - return decl; + ent->label_decl = decl; - decl = make_label_decl (id, /*local_p=*/0); - return decl; + return ent; } /* Wrapper for lookup_label_1. */ @@ -3029,30 +3035,19 @@ lookup_label_1 (tree id) tree lookup_label (tree id) { - tree ret; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - ret = lookup_label_1 (id); + named_label_entry *ent = lookup_label_1 (id, false); timevar_cond_stop (TV_NAME_LOOKUP, subtime); - return ret; + return ent ? ent->label_decl : NULL_TREE; } -/* Declare a local label named ID. */ - tree declare_local_label (tree id) { - tree decl; - cp_label_binding bind; - - /* Add a new entry to the SHADOWED_LABELS list so that when we leave - this scope we can restore the old value of IDENTIFIER_TYPE_VALUE. */ - bind.prev_value = IDENTIFIER_LABEL_VALUE (id); - - decl = make_label_decl (id, /*local_p=*/1); - bind.label = decl; - vec_safe_push (current_binding_level->shadowed_labels, bind); - - return decl; + bool subtime = timevar_cond_start (TV_NAME_LOOKUP); + named_label_entry *ent = lookup_label_1 (id, true); + timevar_cond_stop (TV_NAME_LOOKUP, subtime); + return ent ? ent->label_decl : NULL_TREE; } /* Returns nonzero if it is ill-formed to jump past the declaration of @@ -3091,8 +3086,9 @@ identify_goto (tree decl, location_t loc, const location_t *locus, diagnostic_t diag_kind) { bool complained - = (decl ? emit_diagnostic (diag_kind, loc, 0, "jump to label %qD", decl) - : emit_diagnostic (diag_kind, loc, 0, "jump to case label")); + = emit_diagnostic (diag_kind, loc, 0, + decl ? N_("jump to label %qD") + : N_("jump to case label"), decl); if (complained && locus) inform (*locus, " from here"); return complained; @@ -3147,68 +3143,62 @@ check_previous_goto_1 (tree decl, cp_binding_level* level, tree names, " crosses initialization of %q#D", new_decls); else inform (DECL_SOURCE_LOCATION (new_decls), - " enters scope of %q#D which has " + " enters scope of %q#D, which has " "non-trivial destructor", new_decls); } } if (b == level) break; - if ((b->kind == sk_try || b->kind == sk_catch) && !saw_eh) + + const char *inf = NULL; + location_t loc = input_location; + switch (b->kind) { - if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } - if (complained) - { - if (b->kind == sk_try) - inform (input_location, " enters try block"); - else - inform (input_location, " enters catch block"); - } + case sk_try: + if (!saw_eh) + inf = N_("enters try block"); saw_eh = true; - } - if (b->kind == sk_omp && !saw_omp) - { - if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } - if (complained) - inform (input_location, " enters OpenMP structured block"); + break; + + case sk_catch: + if (!saw_eh) + inf = N_("enters catch block"); + saw_eh = true; + break; + + case sk_omp: + if (!saw_omp) + inf = N_("enters OpenMP structured block"); saw_omp = true; - } - if (b->kind == sk_transaction && !saw_tm) - { - if (identified < 2) + break; + + case sk_transaction: + if (!saw_tm) + inf = N_("enters synchronized or atomic statement"); + saw_tm = true; + break; + + case sk_block: + if (!saw_cxif && level_for_constexpr_if (b->level_chain)) { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; + inf = N_("enters constexpr if statement"); + loc = EXPR_LOCATION (b->level_chain->this_entity); + saw_cxif = true; } - if (complained) - inform (input_location, - " enters synchronized or atomic statement"); - saw_tm = true; + break; + + default: + break; } - if (!saw_cxif && b->kind == sk_block - && level_for_constexpr_if (b->level_chain)) + + if (inf) { if (identified < 2) - { - complained = identify_goto (decl, input_location, locus, - DK_ERROR); - identified = 2; - } + complained = identify_goto (decl, input_location, locus, DK_ERROR); + identified = 2; if (complained) - inform (EXPR_LOCATION (b->level_chain->this_entity), - " enters constexpr if statement"); - saw_cxif = true; + inform (loc, " %s", inf); } } @@ -3235,12 +3225,6 @@ check_switch_goto (cp_binding_level* level) void check_goto (tree decl) { - struct named_label_entry *ent, dummy; - bool saw_catch = false, complained = false; - int identified = 0; - tree bad; - unsigned ix; - /* We can't know where a computed goto is jumping. So we assume that it's OK. */ if (TREE_CODE (decl) != LABEL_DECL) @@ -3251,22 +3235,22 @@ check_goto (tree decl) if (decl == cdtor_label) return; - dummy.label_decl = decl; - ent = named_labels->find (&dummy); - gcc_assert (ent != NULL); + hashval_t hash = IDENTIFIER_HASH_VALUE (DECL_NAME (decl)); + named_label_entry **slot + = named_labels->find_slot_with_hash (DECL_NAME (decl), hash, NO_INSERT); + named_label_entry *ent = *slot; /* If the label hasn't been defined yet, defer checking. */ if (! DECL_INITIAL (decl)) { - struct named_label_use_entry *new_use; - /* Don't bother creating another use if the last goto had the same data, and will therefore create the same set of errors. */ if (ent->uses && ent->uses->names_in_scope == current_binding_level->names) return; - new_use = ggc_alloc<named_label_use_entry> (); + named_label_use_entry *new_use + = ggc_alloc<named_label_use_entry> (); new_use->binding_level = current_binding_level; new_use->names_in_scope = current_binding_level->names; new_use->o_goto_locus = input_location; @@ -3277,6 +3261,11 @@ check_goto (tree decl) return; } + bool saw_catch = false, complained = false; + int identified = 0; + tree bad; + unsigned ix; + if (ent->in_try_scope || ent->in_catch_scope || ent->in_transaction_scope || ent->in_constexpr_if || ent->in_omp_scope || !vec_safe_is_empty (ent->bad_decls)) @@ -3337,27 +3326,24 @@ check_goto (tree decl) inform (input_location, " enters OpenMP structured block"); } else if (flag_openmp) - { - cp_binding_level *b; - for (b = current_binding_level; b ; b = b->level_chain) - { - if (b == ent->binding_level) + for (cp_binding_level *b = current_binding_level; b ; b = b->level_chain) + { + if (b == ent->binding_level) + break; + if (b->kind == sk_omp) + { + if (identified < 2) + { + complained = identify_goto (decl, + DECL_SOURCE_LOCATION (decl), + &input_location, DK_ERROR); + identified = 2; + } + if (complained) + inform (input_location, " exits OpenMP structured block"); break; - if (b->kind == sk_omp) - { - if (identified < 2) - { - complained = identify_goto (decl, - DECL_SOURCE_LOCATION (decl), - &input_location, DK_ERROR); - identified = 2; - } - if (complained) - inform (input_location, " exits OpenMP structured block"); - break; - } - } - } + } + } } /* Check that a return is ok wrt OpenMP structured blocks. @@ -3366,8 +3352,7 @@ check_goto (tree decl) bool check_omp_return (void) { - cp_binding_level *b; - for (b = current_binding_level; b ; b = b->level_chain) + for (cp_binding_level *b = current_binding_level; b ; b = b->level_chain) if (b->kind == sk_omp) { error ("invalid exit from OpenMP structured block"); @@ -3384,25 +3369,15 @@ check_omp_return (void) static tree define_label_1 (location_t location, tree name) { - struct named_label_entry *ent, dummy; - cp_binding_level *p; - tree decl; - - decl = lookup_label (name); - - dummy.label_decl = decl; - ent = named_labels->find (&dummy); - gcc_assert (ent != NULL); - /* After labels, make any new cleanups in the function go into their own new (temporary) binding contour. */ - for (p = current_binding_level; + for (cp_binding_level *p = current_binding_level; p->kind != sk_function_parms; p = p->level_chain) p->more_cleanups_ok = 0; - if (name == get_identifier ("wchar_t")) - permerror (input_location, "label named wchar_t"); + named_label_entry *ent = lookup_label_1 (name, false); + tree decl = ent->label_decl; if (DECL_INITIAL (decl) != NULL_TREE) { @@ -3411,8 +3386,6 @@ define_label_1 (location_t location, tree name) } else { - struct named_label_use_entry *use; - /* Mark label as having been defined. */ DECL_INITIAL (decl) = error_mark_node; /* Say where in the source. */ @@ -3421,7 +3394,7 @@ define_label_1 (location_t location, tree name) ent->binding_level = current_binding_level; ent->names_in_scope = current_binding_level->names; - for (use = ent->uses; use ; use = use->next) + for (named_label_use_entry *use = ent->uses; use; use = use->next) check_previous_goto (decl, use); ent->uses = NULL; } @@ -3434,9 +3407,8 @@ define_label_1 (location_t location, tree name) tree define_label (location_t location, tree name) { - tree ret; bool running = timevar_cond_start (TV_NAME_LOOKUP); - ret = define_label_1 (location, name); + tree ret = define_label_1 (location, name); timevar_cond_stop (TV_NAME_LOOKUP, running); return ret; } diff --git a/gcc/cp/lex.c b/gcc/cp/lex.c index fd93401..da9187d 100644 --- a/gcc/cp/lex.c +++ b/gcc/cp/lex.c @@ -585,7 +585,6 @@ make_conv_op_name (tree type) /* Just in case something managed to bind. */ IDENTIFIER_BINDING (identifier) = NULL; - IDENTIFIER_LABEL_VALUE (identifier) = NULL_TREE; /* Hang TYPE off the identifier so it can be found easily later when performing conversions. */ diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index bf0bf85..1fc1280 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -148,15 +148,6 @@ struct GTY(()) cp_class_binding { tree identifier; }; - -struct GTY(()) cp_label_binding { - /* The bound LABEL_DECL. */ - tree label; - /* The previous IDENTIFIER_LABEL_VALUE. */ - tree prev_value; -}; - - /* For each binding contour we allocate a binding_level structure which records the names defined in that contour. Contours include: @@ -202,10 +193,6 @@ struct GTY(()) cp_binding_level { the class. */ tree type_shadowed; - /* Similar to class_shadowed, but for IDENTIFIER_LABEL_VALUE, and - used for all binding levels. */ - vec<cp_label_binding, va_gc> *shadowed_labels; - /* For each level (except not the global one), a chain of BLOCK nodes for all the levels that were entered and exited one level down. */ diff --git a/gcc/cp/ptree.c b/gcc/cp/ptree.c index 50c717e..90bae2a 100644 --- a/gcc/cp/ptree.c +++ b/gcc/cp/ptree.c @@ -177,7 +177,6 @@ cxx_print_identifier (FILE *file, tree node, int indent) indent_to (file, indent + 4); fprintf (file, "%s local bindings <%p>", get_identifier_kind_name (node), (void *) IDENTIFIER_BINDING (node)); - print_node (file, "label", IDENTIFIER_LABEL_VALUE (node), indent + 4); } void diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 71b2445..2fc087a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -948,6 +948,7 @@ Objective-C and Objective-C++ Dialects}. @emph{Nios II Options} @gccoptlist{-G @var{num} -mgpopt=@var{option} -mgpopt -mno-gpopt @gol +-mgprel-sec=@var{regexp} -mr0rel-sec=@var{regexp} @gol -mel -meb @gol -mno-bypass-cache -mbypass-cache @gol -mno-cache-volatile -mcache-volatile @gol @@ -6982,7 +6983,12 @@ link processing time. Merging is enabled by default. @item -fdebug-prefix-map=@var{old}=@var{new} @opindex fdebug-prefix-map When compiling files in directory @file{@var{old}}, record debugging -information describing them as in @file{@var{new}} instead. +information describing them as in @file{@var{new}} instead. This can be +used to replace a build-time path with an install-time path in the debug info. +It can also be used to change an absolute path to a relative path by using +@file{.} for @var{new}. This can give more reproducible builds, which are +location independent, but may require an extra command to tell GDB where to +find the source files. @item -fvar-tracking @opindex fvar-tracking @@ -21166,6 +21172,32 @@ GOT data sections. In this case, the 16-bit offset for GP-relative addressing may not be large enough to allow access to the entire small data section. +@item -mgprel-sec=@var{regexp} +@opindex mgprel-sec +This option specifies additional section names that can be accessed via +GP-relative addressing. It is most useful in conjunction with +@code{section} attributes on variable declarations +(@pxref{Common Variable Attributes}) and a custom linker script. +The @var{regexp} is a POSIX Extended Regular Expression. + +This option does not affect the behavior of the @option{-G} option, and +and the specified sections are in addition to the standard @code{.sdata} +and @code{.sbss} small-data sections that are recognized by @option{-mgpopt}. + +@item -mr0rel-sec=@var{regexp} +@opindex mr0rel-sec +This option specifies names of sections that can be accessed via a +16-bit offset from @code{r0}; that is, in the low 32K or high 32K +of the 32-bit address space. It is most useful in conjunction with +@code{section} attributes on variable declarations +(@pxref{Common Variable Attributes}) and a custom linker script. +The @var{regexp} is a POSIX Extended Regular Expression. + +In contrast to the use of GP-relative addressing for small data, +zero-based addressing is never generated by default and there are no +conventional section names used in standard linker scripts for sections +in the low or high areas of memory. + @item -mel @itemx -meb @opindex mel @@ -22614,12 +22646,18 @@ Disable Book-E SPE ABI extensions for the current ABI@. @item -mabi=ibmlongdouble @opindex mabi=ibmlongdouble Change the current ABI to use IBM extended-precision long double. -This is a PowerPC 32-bit SYSV ABI option. +This is not likely to work if your system defaults to using IEEE +extended-precision long double. If you change the long double type +from IEEE extended-precision, the compiler will issue a warning unless +you use the @option{-Wno-psabi} option. @item -mabi=ieeelongdouble @opindex mabi=ieeelongdouble Change the current ABI to use IEEE extended-precision long double. -This is a PowerPC 32-bit Linux ABI option. +This is not likely to work if your system defaults to using IBM +extended-precision long double. If you change the long double type +from IBM extended-precision, the compiler will issue a warning unless +you use the @option{-Wno-psabi} option. @item -mabi=elfv1 @opindex mabi=elfv1 diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 7d6d4a3..c00aece 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1702,6 +1702,17 @@ ARM target supports executing instructions from ARMv8.2 with the FP16 extension. Some multilibs may be incompatible with these options. Implies arm_v8_2a_fp16_neon_ok and arm_v8_2a_fp16_scalar_hw. +@item arm_v8_2a_dotprod_neon_ok +@anchor{arm_v8_2a_dotprod_neon_ok} +ARM target supports options to generate instructions from ARMv8.2 with +the Dot Product extension. Some multilibs may be incompatible with these +options. + +@item arm_v8_2a_dotprod_neon_hw +ARM target supports executing instructions from ARMv8.2 with the Dot +Product extension. Some multilibs may be incompatible with these options. +Implies arm_v8_2a_dotprod_neon_ok. + @item arm_prefer_ldrd_strd ARM target prefers @code{LDRD} and @code{STRD} instructions over @code{LDM} and @code{STM} instructions. @@ -2308,6 +2319,11 @@ supported by the target; see the @ref{arm_v8_2a_fp16_neon_ok,,arm_v8_2a_fp16_neon_ok} effective target keyword. +@item arm_v8_2a_dotprod_neon +Add options for ARMv8.2 with Adv.SIMD Dot Product support, if this is +supported by the target; see the +@ref{arm_v8_2a_dotprod_neon_ok} effective target keyword. + @item bind_pic_locally Add the target-specific flags needed to enable functions to bind locally when using pic/PIC passes in the testsuite. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8484c1d..c02f4d3 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -1078,6 +1078,13 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x80000000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@deftypefn {Target Hook} HOST_WIDE_INT TARGET_STATIC_RTX_ALIGNMENT (machine_mode @var{mode}) +This hook returns the preferred alignment in bits for a +statically-allocated rtx, such as a constant pool entry. @var{mode} +is the mode of the rtx. The default implementation returns +@samp{GET_MODE_ALIGNMENT (@var{mode})}. +@end deftypefn + @defmac DATA_ALIGNMENT (@var{type}, @var{basic-align}) If defined, a C expression to compute the alignment for a variable in the static store. @var{type} is the data type, and @var{basic-align} is diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 015f59e..37308e1 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -1026,6 +1026,8 @@ On 32-bit ELF the largest supported section alignment in bits is @samp{(0x80000000 * 8)}, but this is not representable on 32-bit hosts. @end defmac +@hook TARGET_STATIC_RTX_ALIGNMENT + @defmac DATA_ALIGNMENT (@var{type}, @var{basic-align}) If defined, a C expression to compute the alignment for a variable in the static store. @var{type} is the data type, and @var{basic-align} is diff --git a/gcc/fold-const.c b/gcc/fold-const.c index c16959b..9db5aeb 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3366,7 +3366,8 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) #undef OP_SAME_WITH_NULL } -/* Similar to operand_equal_p, but strip nops first. */ +/* Similar to operand_equal_p, but see if ARG0 might be a variant of ARG1 + with a different signedness or a narrower precision. */ static bool operand_equal_for_comparison_p (tree arg0, tree arg1) @@ -3381,9 +3382,20 @@ operand_equal_for_comparison_p (tree arg0, tree arg1) /* Discard any conversions that don't change the modes of ARG0 and ARG1 and see if the inner values are the same. This removes any signedness comparison, which doesn't matter here. */ - STRIP_NOPS (arg0); - STRIP_NOPS (arg1); - if (operand_equal_p (arg0, arg1, 0)) + tree op0 = arg0; + tree op1 = arg1; + STRIP_NOPS (op0); + STRIP_NOPS (op1); + if (operand_equal_p (op0, op1, 0)) + return true; + + /* Discard a single widening conversion from ARG1 and see if the inner + value is the same as ARG0. */ + if (CONVERT_EXPR_P (arg1) + && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (arg1, 0))) + && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg1, 0))) + < TYPE_PRECISION (TREE_TYPE (arg1)) + && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) return true; return false; @@ -11169,8 +11181,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, Also try swapping the arguments and inverting the conditional. */ if (COMPARISON_CLASS_P (arg0) - && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), arg1) - && !HONOR_SIGNED_ZEROS (element_mode (arg1))) + && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), op1) + && !HONOR_SIGNED_ZEROS (element_mode (op1))) { tem = fold_cond_expr_with_comparison (loc, type, arg0, op1, op2); if (tem) diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 6bf4f1d..9e34152 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,7 @@ +2017-10-25 Bernhard Reutner-Fischer <aldot@gcc.gnu.org> + + * match.c (gfc_match_type_is): Fix typo in error message. + 2017-10-21 Paul Thomas <pault@gcc.gnu.org> PR fortran/82586 diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index 4d657e0..624fdf5 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -6204,7 +6204,7 @@ gfc_match_type_is (void) return MATCH_YES; syntax: - gfc_error ("Ssyntax error in TYPE IS specification at %C"); + gfc_error ("Syntax error in TYPE IS specification at %C"); cleanup: if (c != NULL) diff --git a/gcc/gimple.c b/gcc/gimple.c index 1f291e1..37f2248 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -1840,11 +1840,35 @@ gimple_copy (gimple *stmt) gimple_omp_sections_set_clauses (copy, t); t = unshare_expr (gimple_omp_sections_control (stmt)); gimple_omp_sections_set_control (copy, t); - /* FALLTHRU */ + goto copy_omp_body; case GIMPLE_OMP_SINGLE: + { + gomp_single *omp_single_copy = as_a <gomp_single *> (copy); + t = unshare_expr (gimple_omp_single_clauses (stmt)); + gimple_omp_single_set_clauses (omp_single_copy, t); + } + goto copy_omp_body; + case GIMPLE_OMP_TARGET: + { + gomp_target *omp_target_stmt = as_a <gomp_target *> (stmt); + gomp_target *omp_target_copy = as_a <gomp_target *> (copy); + t = unshare_expr (gimple_omp_target_clauses (omp_target_stmt)); + gimple_omp_target_set_clauses (omp_target_copy, t); + t = unshare_expr (gimple_omp_target_data_arg (omp_target_stmt)); + gimple_omp_target_set_data_arg (omp_target_copy, t); + } + goto copy_omp_body; + case GIMPLE_OMP_TEAMS: + { + gomp_teams *omp_teams_copy = as_a <gomp_teams *> (copy); + t = unshare_expr (gimple_omp_teams_clauses (stmt)); + gimple_omp_teams_set_clauses (omp_teams_copy, t); + } + /* FALLTHRU */ + case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: case GIMPLE_OMP_TASKGROUP: diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 8b1846d..0fa2ccc 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -a409ac2c78899e638a014c97891925bec93cb3ad +64d570c590a76921cbdca4efb22e4675e19cc809 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc index 8337cbe..dad22eb 100644 --- a/gcc/go/gofrontend/expressions.cc +++ b/gcc/go/gofrontend/expressions.cc @@ -144,8 +144,8 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, || rhs->is_error_expression()) return Expression::make_error(location); - if (lhs_type->forwarded() != rhs_type->forwarded() - && lhs_type->interface_type() != NULL) + bool are_identical = Type::are_identical(lhs_type, rhs_type, false, NULL); + if (!are_identical && lhs_type->interface_type() != NULL) { if (rhs_type->interface_type() == NULL) return Expression::convert_type_to_interface(lhs_type, rhs, location); @@ -153,8 +153,7 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, return Expression::convert_interface_to_interface(lhs_type, rhs, false, location); } - else if (lhs_type->forwarded() != rhs_type->forwarded() - && rhs_type->interface_type() != NULL) + else if (!are_identical && rhs_type->interface_type() != NULL) return Expression::convert_interface_to_type(lhs_type, rhs, location); else if (lhs_type->is_slice_type() && rhs_type->is_nil_type()) { @@ -165,8 +164,15 @@ Expression::convert_for_assignment(Gogo*, Type* lhs_type, } else if (rhs_type->is_nil_type()) return Expression::make_nil(location); - else if (Type::are_identical(lhs_type, rhs_type, false, NULL)) + else if (are_identical) { + if (lhs_type->forwarded() != rhs_type->forwarded()) + { + // Different but identical types require an explicit + // conversion. This happens with type aliases. + return Expression::make_cast(lhs_type, rhs, location); + } + // No conversion is needed. return rhs; } diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 31a4a80..240eb48 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -3908,7 +3908,7 @@ coalesced_pseudo_reg_freq_compare (const void *v1p, const void *v2p) /* Widest width in which each pseudo reg is referred to (via subreg). It is used for sorting pseudo registers. */ -static unsigned int *regno_max_ref_width; +static machine_mode *regno_max_ref_mode; /* Sort pseudos according their slot numbers (putting ones with smaller numbers first, or last when the frame pointer is not @@ -3921,7 +3921,7 @@ coalesced_pseudo_reg_slot_compare (const void *v1p, const void *v2p) ira_allocno_t a1 = ira_regno_allocno_map[regno1]; ira_allocno_t a2 = ira_regno_allocno_map[regno2]; int diff, slot_num1, slot_num2; - int total_size1, total_size2; + machine_mode mode1, mode2; if (a1 == NULL || ALLOCNO_HARD_REGNO (a1) >= 0) { @@ -3936,11 +3936,11 @@ coalesced_pseudo_reg_slot_compare (const void *v1p, const void *v2p) if ((diff = slot_num1 - slot_num2) != 0) return (frame_pointer_needed || (!FRAME_GROWS_DOWNWARD) == STACK_GROWS_DOWNWARD ? diff : -diff); - total_size1 = MAX (PSEUDO_REGNO_BYTES (regno1), - regno_max_ref_width[regno1]); - total_size2 = MAX (PSEUDO_REGNO_BYTES (regno2), - regno_max_ref_width[regno2]); - if ((diff = total_size2 - total_size1) != 0) + mode1 = wider_subreg_mode (PSEUDO_REGNO_MODE (regno1), + regno_max_ref_mode[regno1]); + mode2 = wider_subreg_mode (PSEUDO_REGNO_MODE (regno2), + regno_max_ref_mode[regno2]); + if ((diff = GET_MODE_SIZE (mode2) - GET_MODE_SIZE (mode1)) != 0) return diff; return regno1 - regno2; } @@ -4144,7 +4144,7 @@ coalesce_spill_slots (ira_allocno_t *spilled_coalesced_allocnos, int num) reload. */ void ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, - unsigned int *reg_max_ref_width) + machine_mode *reg_max_ref_mode) { int max_regno = max_reg_num (); int i, regno, num, slot_num; @@ -4225,10 +4225,14 @@ ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, ira_assert (ALLOCNO_HARD_REGNO (a) < 0); ALLOCNO_HARD_REGNO (a) = -slot_num; if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL) - fprintf (ira_dump_file, " a%dr%d(%d,%d)", - ALLOCNO_NUM (a), ALLOCNO_REGNO (a), ALLOCNO_FREQ (a), - MAX (PSEUDO_REGNO_BYTES (ALLOCNO_REGNO (a)), - reg_max_ref_width[ALLOCNO_REGNO (a)])); + { + machine_mode mode = wider_subreg_mode + (PSEUDO_REGNO_MODE (ALLOCNO_REGNO (a)), + reg_max_ref_mode[ALLOCNO_REGNO (a)]); + fprintf (ira_dump_file, " a%dr%d(%d,%d)", + ALLOCNO_NUM (a), ALLOCNO_REGNO (a), ALLOCNO_FREQ (a), + GET_MODE_SIZE (mode)); + } if (a == allocno) break; @@ -4239,7 +4243,7 @@ ira_sort_regnos_for_alter_reg (int *pseudo_regnos, int n, ira_spilled_reg_stack_slots_num = slot_num - 1; ira_free (spilled_coalesced_allocnos); /* Sort regnos according the slot numbers. */ - regno_max_ref_width = reg_max_ref_width; + regno_max_ref_mode = reg_max_ref_mode; qsort (pseudo_regnos, n, sizeof (int), coalesced_pseudo_reg_slot_compare); FOR_EACH_ALLOCNO (a, ai) ALLOCNO_ADD_DATA (a) = NULL; @@ -195,7 +195,7 @@ extern void ira_set_pseudo_classes (bool, FILE *); extern void ira_expand_reg_equiv (void); extern void ira_update_equiv_info_by_shuffle_insn (int, int, rtx_insn *); -extern void ira_sort_regnos_for_alter_reg (int *, int, unsigned int *); +extern void ira_sort_regnos_for_alter_reg (int *, int, machine_mode *); extern void ira_mark_allocation_change (int); extern void ira_mark_memory_move_deletion (int, int); extern bool ira_reassign_pseudos (int *, int, HARD_REG_SET, HARD_REG_SET *, diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 6163d7d..a423f06 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1679,7 +1679,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode) bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg)); insert_before = (type != OP_OUT - || GET_MODE_SIZE (innermode) > GET_MODE_SIZE (mode)); + || read_modify_subreg_p (operand)); insert_after = (type != OP_IN); insert_move_for_subreg (insert_before ? &before : NULL, insert_after ? &after : NULL, @@ -1772,10 +1772,9 @@ uses_hard_regs_p (rtx x, HARD_REG_SET set) mode = GET_MODE (x); if (code == SUBREG) { + mode = wider_subreg_mode (x); x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode)) - mode = GET_MODE (x); } if (REG_P (x)) @@ -1953,10 +1952,8 @@ process_alt_operands (int only_alternative) biggest_mode[nop] = GET_MODE (op); if (GET_CODE (op) == SUBREG) { + biggest_mode[nop] = wider_subreg_mode (op); operand_reg[nop] = reg = SUBREG_REG (op); - if (GET_MODE_SIZE (biggest_mode[nop]) - < GET_MODE_SIZE (GET_MODE (reg))) - biggest_mode[nop] = GET_MODE (reg); } if (! REG_P (reg)) operand_reg[nop] = NULL_RTX; @@ -4232,9 +4229,7 @@ curr_insn_transform (bool check_only_p) constraints. */ if (type == OP_OUT && (curr_static_id->operand[i].strict_low - || (GET_MODE_SIZE (GET_MODE (reg)) > UNITS_PER_WORD - && (GET_MODE_SIZE (mode) - < GET_MODE_SIZE (GET_MODE (reg)))))) + || read_modify_subreg_p (*loc))) type = OP_INOUT; loc = &SUBREG_REG (*loc); mode = GET_MODE (*loc); @@ -5661,8 +5656,7 @@ invariant_p (const_rtx x) { x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode)) - mode = GET_MODE (x); + mode = wider_subreg_mode (mode, GET_MODE (x)); } if (MEM_P (x)) diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c index 5997b1e..9abcda4 100644 --- a/gcc/lra-spills.c +++ b/gcc/lra-spills.c @@ -134,8 +134,7 @@ assign_mem_slot (int i) machine_mode mode = GET_MODE (regno_reg_rtx[i]); HOST_WIDE_INT inherent_size = PSEUDO_REGNO_BYTES (i); machine_mode wider_mode - = (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (lra_reg_info[i].biggest_mode) - ? mode : lra_reg_info[i].biggest_mode); + = wider_subreg_mode (mode, lra_reg_info[i].biggest_mode); HOST_WIDE_INT total_size = GET_MODE_SIZE (wider_mode); HOST_WIDE_INT adjust = 0; @@ -312,10 +311,8 @@ add_pseudo_to_slot (int regno, int slot_num) and a total size which provides room for paradoxical subregs. We need to make sure the size and alignment of the slot are sufficient for both. */ - machine_mode mode = (GET_MODE_SIZE (PSEUDO_REGNO_MODE (regno)) - >= GET_MODE_SIZE (lra_reg_info[regno].biggest_mode) - ? PSEUDO_REGNO_MODE (regno) - : lra_reg_info[regno].biggest_mode); + machine_mode mode = wider_subreg_mode (PSEUDO_REGNO_MODE (regno), + lra_reg_info[regno].biggest_mode); unsigned int align = spill_slot_alignment (mode); slots[slot_num].align = MAX (slots[slot_num].align, align); slots[slot_num].size = MAX (slots[slot_num].size, GET_MODE_SIZE (mode)); @@ -832,14 +832,11 @@ collect_non_operand_hard_regs (rtx *x, lra_insn_recog_data_t data, subreg_p = false; if (code == SUBREG) { + mode = wider_subreg_mode (op); + if (read_modify_subreg_p (op)) + subreg_p = true; op = SUBREG_REG (op); code = GET_CODE (op); - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (op))) - { - mode = GET_MODE (op); - if (GET_MODE_SIZE (mode) > REGMODE_NATURAL_SIZE (mode)) - subreg_p = true; - } } if (REG_P (op)) { @@ -1427,14 +1424,11 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, int uid, subreg_p = false; if (GET_CODE (x) == SUBREG) { + mode = wider_subreg_mode (x); + if (read_modify_subreg_p (x)) + subreg_p = true; x = SUBREG_REG (x); code = GET_CODE (x); - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (x))) - { - mode = GET_MODE (x); - if (GET_MODE_SIZE (mode) > REGMODE_NATURAL_SIZE (mode)) - subreg_p = true; - } } if (REG_P (x)) { diff --git a/gcc/reload1.c b/gcc/reload1.c index e2ee2fe..e15bd8a 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -97,8 +97,8 @@ static regset_head reg_has_output_reload; in the current insn. */ static HARD_REG_SET reg_is_output_reload; -/* Widest width in which each pseudo reg is referred to (via subreg). */ -static unsigned int *reg_max_ref_width; +/* Widest mode in which each pseudo reg is referred to (via subreg). */ +static machine_mode *reg_max_ref_mode; /* Vector to remember old contents of reg_renumber before spilling. */ static short *reg_old_renumber; @@ -830,7 +830,7 @@ reload (rtx_insn *first, int global) if (ira_conflicts_p) /* Ask IRA to order pseudo-registers for better stack slot sharing. */ - ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_width); + ira_sort_regnos_for_alter_reg (temp_pseudo_reg_arr, n, reg_max_ref_mode); for (i = 0; i < n; i++) alter_reg (temp_pseudo_reg_arr[i], -1, false); @@ -1252,7 +1252,7 @@ reload (rtx_insn *first, int global) /* Indicate that we no longer have known memory locations or constants. */ free_reg_equiv (); - free (reg_max_ref_width); + free (reg_max_ref_mode); free (reg_old_renumber); free (pseudo_previous_regs); free (pseudo_forbidden_regs); @@ -2142,8 +2142,9 @@ alter_reg (int i, int from_reg, bool dont_share_p) machine_mode mode = GET_MODE (regno_reg_rtx[i]); unsigned int inherent_size = PSEUDO_REGNO_BYTES (i); unsigned int inherent_align = GET_MODE_ALIGNMENT (mode); - unsigned int total_size = MAX (inherent_size, reg_max_ref_width[i]); - unsigned int min_align = reg_max_ref_width[i] * BITS_PER_UNIT; + machine_mode wider_mode = wider_subreg_mode (mode, reg_max_ref_mode[i]); + unsigned int total_size = GET_MODE_SIZE (wider_mode); + unsigned int min_align = GET_MODE_BITSIZE (reg_max_ref_mode[i]); int adjust = 0; something_was_spilled = true; @@ -4083,9 +4084,9 @@ init_eliminable_invariants (rtx_insn *first, bool do_subregs) grow_reg_equivs (); if (do_subregs) - reg_max_ref_width = XCNEWVEC (unsigned int, max_regno); + reg_max_ref_mode = XCNEWVEC (machine_mode, max_regno); else - reg_max_ref_width = NULL; + reg_max_ref_mode = NULL; num_eliminable_invariants = 0; @@ -4404,7 +4405,7 @@ finish_spills (int global) return something_changed; } -/* Find all paradoxical subregs within X and update reg_max_ref_width. */ +/* Find all paradoxical subregs within X and update reg_max_ref_mode. */ static void scan_paradoxical_subregs (rtx x) @@ -4427,13 +4428,14 @@ scan_paradoxical_subregs (rtx x) return; case SUBREG: - if (REG_P (SUBREG_REG (x)) - && (GET_MODE_SIZE (GET_MODE (x)) - > reg_max_ref_width[REGNO (SUBREG_REG (x))])) + if (REG_P (SUBREG_REG (x))) { - reg_max_ref_width[REGNO (SUBREG_REG (x))] - = GET_MODE_SIZE (GET_MODE (x)); - mark_home_live_1 (REGNO (SUBREG_REG (x)), GET_MODE (x)); + unsigned int regno = REGNO (SUBREG_REG (x)); + if (partial_subreg_p (reg_max_ref_mode[regno], GET_MODE (x))) + { + reg_max_ref_mode[regno] = GET_MODE (x); + mark_home_live_1 (regno, GET_MODE (x)); + } } return; @@ -2877,6 +2877,24 @@ subreg_lowpart_offset (machine_mode outermode, machine_mode innermode) GET_MODE_SIZE (innermode)); } +/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE, + return the mode that is big enough to hold both the outer and inner + values. Prefer the outer mode in the event of a tie. */ + +inline machine_mode +wider_subreg_mode (machine_mode outermode, machine_mode innermode) +{ + return partial_subreg_p (outermode, innermode) ? innermode : outermode; +} + +/* Likewise for subreg X. */ + +inline machine_mode +wider_subreg_mode (const_rtx x) +{ + return wider_subreg_mode (GET_MODE (x), GET_MODE (SUBREG_REG (x))); +} + extern unsigned int subreg_size_highpart_offset (unsigned int, unsigned int); /* Return the SUBREG_BYTE for an OUTERMODE highpart of an INNERMODE value. */ diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 560bfd4..beb24ba 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -1124,10 +1124,7 @@ reg_referenced_p (const_rtx x, const_rtx body) && !REG_P (SET_DEST (body)) && ! (GET_CODE (SET_DEST (body)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (body))) - && (((GET_MODE_SIZE (GET_MODE (SUBREG_REG (SET_DEST (body)))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SET_DEST (body))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))) + && !read_modify_subreg_p (SET_DEST (body))) && reg_overlap_mentioned_p (x, SET_DEST (body))) return 1; return 0; @@ -2017,20 +2014,16 @@ dead_or_set_p (const rtx_insn *insn, const_rtx x) return 1; } -/* Return TRUE iff DEST is a register or subreg of a register and - doesn't change the number of words of the inner register, and any - part of the register is TEST_REGNO. */ +/* Return TRUE iff DEST is a register or subreg of a register, is a + complete rather than read-modify-write destination, and contains + register TEST_REGNO. */ static bool covers_regno_no_parallel_p (const_rtx dest, unsigned int test_regno) { unsigned int regno, endregno; - if (GET_CODE (dest) == SUBREG - && (((GET_MODE_SIZE (GET_MODE (dest)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD))) + if (GET_CODE (dest) == SUBREG && !read_modify_subreg_p (dest)) dest = SUBREG_REG (dest); if (!REG_P (dest)) diff --git a/gcc/target.def b/gcc/target.def index 435849c..6a1cd31 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3336,6 +3336,15 @@ HOOK_VECTOR_END (addr_space) #define HOOK_PREFIX "TARGET_" DEFHOOK +(static_rtx_alignment, + "This hook returns the preferred alignment in bits for a\n\ +statically-allocated rtx, such as a constant pool entry. @var{mode}\n\ +is the mode of the rtx. The default implementation returns\n\ +@samp{GET_MODE_ALIGNMENT (@var{mode})}.", + HOST_WIDE_INT, (machine_mode mode), + default_static_rtx_alignment) + +DEFHOOK (constant_alignment, "This hook returns the alignment in bits of a constant that is being\n\ placed in memory. @var{constant} is the constant and @var{basic_align}\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 41cab38..92ecc90 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1173,6 +1173,14 @@ tree default_mangle_decl_assembler_name (tree decl ATTRIBUTE_UNUSED, return id; } +/* The default implementation of TARGET_STATIC_RTX_ALIGNMENT. */ + +HOST_WIDE_INT +default_static_rtx_alignment (machine_mode mode) +{ + return GET_MODE_ALIGNMENT (mode); +} + /* The default implementation of TARGET_CONSTANT_ALIGNMENT. */ HOST_WIDE_INT diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 1510bb9..f60bca2 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -94,6 +94,7 @@ extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, in extern tree default_builtin_reciprocal (tree); +extern HOST_WIDE_INT default_static_rtx_alignment (machine_mode); extern HOST_WIDE_INT default_constant_alignment (const_tree, HOST_WIDE_INT); extern HOST_WIDE_INT constant_alignment_word_strings (const_tree, HOST_WIDE_INT); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8dbf3b5..2f840a9 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,138 @@ +2017-10-26 Sandra Loosemore <sandra@codesourcery.com> + + * gcc.target/nios2/gpopt-r0rel-sec.c: New. + +2017-10-26 Sandra Loosemore <sandra@codesourcery.com> + + * gcc.target/nios2/gpopt-gprel-sec.c: New. + +2017-10-26 Olga Makhotina <olga.makhotina@intel.com> + + * gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask, + _mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, + _mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask, + _mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask, + _mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask, + _mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask, + _mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask, + _mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask, + _mm512_mask_cmpunord_ps_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask): Test new intrinsics. + * gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask, + _mm512_cmple_pd_mask, _mm512_cmplt_pd_mask, + _mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask, + _mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask, + _mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask, + _mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask, + _mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask, + _mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask, + _mm512_mask_cmpunord_pd_mask): Test new intrinsics. + +2017-10-26 Wilco Dijkstra <wdijkstr@arm.com> + + * gcc.target/aarch64/ldp_stp_unaligned_2.c: New file. + +2017-10-26 James Greenhalgh <james.greenhalgh@arm.com> + + * gcc.target/arm/require-pic-register-loc.c: Use wider regex for + column information. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + * gcc.dg/vect/vect-reduc-dot-s8a.c + (dg-additional-options, dg-require-effective-target): Add +dotprod. + * gcc.dg/vect/vect-reduc-dot-u8a.c + (dg-additional-options, dg-require-effective-target): Add +dotprod. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + * lib/target-supports.exp + (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. + (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. + (add_options_for_arm_v8_2a_dotprod_neon): New. + (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. + (check_effective_target_vect_sdot_qi): Add ARM && AArch64. + (check_effective_target_vect_udot_qi): Likewise. + * gcc.target/arm/simd/vdot-exec.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. + * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + * gcc.dg/vect/vect-multitypes-1.c: Correct target selector. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + * gcc.target/aarch64/inline-lrint_2.c (dg-options): Add -fno-trapping-math. + +2017-10-26 Tamar Christina <tamar.christina@arm.com> + + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New. + * gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New. + * gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New. + +2017-10-25 David Malcolm <dmalcolm@redhat.com> + + PR c/7356 + PR c/44515 + * c-c++-common/pr44515.c: New test case. + * gcc.dg/pr7356-2.c: New test case. + * gcc.dg/pr7356.c: New test case. + * gcc.dg/spellcheck-typenames.c: Update the "singed" char "TODO" + case to reflect changes to output. + * gcc.dg/noncompile/920923-1.c: Add dg-warning to reflect changes + to output. + +2017-10-25 Eric Botcazou <ebotcazou@adacore.com> + + * gcc.dg/fold-cond_expr-1.c: Rename to... + * gcc.dg/fold-cond-2.c: ...this. + * gcc.dg/fold-cond-3.c: New test. + +2017-10-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/82436 + * gcc.dg/torture/pr82436-2.c: New testcase. + +2017-10-25 Paolo Carlini <paolo.carlini@oracle.com> + + PR c++/71820 + * g++.dg/ext/typeof12.C: New. + +2017-10-25 Tom de Vries <tom@codesourcery.com> + + * gcc.dg/tree-ssa/loop-1.c: Add xfail for nvptx in scan-assembler-times + line, and add nvptx-specific version. + +2017-10-25 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + * gcc.target/i386/cet-sjlj-5.c: Allow for emtpy user label prefix + in setjmp call. + +2017-10-25 Jakub Jelinek <jakub@redhat.com> + + PR libstdc++/81706 + * gcc.target/i386/pr81706.c: New test. + * g++.dg/ext/pr81706.C: New test. + 2017-10-24 Jakub Jelinek <jakub@redhat.com> PR target/82460 diff --git a/gcc/testsuite/c-c++-common/pr44515.c b/gcc/testsuite/c-c++-common/pr44515.c new file mode 100644 index 0000000..dbb77509 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr44515.c @@ -0,0 +1,14 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +void bar(void); +void foo(void) +{ + bar() /* { dg-error "expected ';' before '.' token" } */ +} +/* { dg-begin-multiline-output "" } + bar() + ^ + ; + } + ~ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/g++.dg/ext/pr81706.C b/gcc/testsuite/g++.dg/ext/pr81706.C new file mode 100644 index 0000000..f0ed8ab --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/pr81706.C @@ -0,0 +1,32 @@ +// PR libstdc++/81706 +// { dg-do compile { target i?86-*-* x86_64-*-* } } +// { dg-options "-O3 -mavx2 -mno-avx512f" } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } +// { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +} diff --git a/gcc/testsuite/g++.dg/ext/typeof12.C b/gcc/testsuite/g++.dg/ext/typeof12.C new file mode 100644 index 0000000..4ba7573 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/typeof12.C @@ -0,0 +1,11 @@ +// PR c++/71820 + +void f (void (*) (int, int)) {} + +template < typename T > void g (T x, __typeof__ x) {} // { dg-message "sorry, unimplemented: mangling" } + +int main () +{ + f (g < int >); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/fold-cond_expr-1.c b/gcc/testsuite/gcc.dg/fold-cond-2.c index 68ec754..68ec754 100644 --- a/gcc/testsuite/gcc.dg/fold-cond_expr-1.c +++ b/gcc/testsuite/gcc.dg/fold-cond-2.c diff --git a/gcc/testsuite/gcc.dg/fold-cond-3.c b/gcc/testsuite/gcc.dg/fold-cond-3.c new file mode 100644 index 0000000..fe0ba65 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-cond-3.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-fdump-tree-original" } */ + +unsigned long f1 (int x) +{ + return x > 0 ? (unsigned long) x : 0; +} + +unsigned long f2 (int x, int y) +{ + return x > y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f3 (int x) +{ + return x < 0 ? (unsigned long) x : 0; +} + +unsigned long f4 (int x, int y) +{ + return x < y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f5 (unsigned int x, unsigned int y) +{ + return x > y ? (unsigned long) x : (unsigned long) y; +} + +unsigned long f6 (unsigned int x, unsigned int y) +{ + return x < y ? (unsigned long) x : (unsigned long) y; +} + +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 3 "original"} } */ +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "original"} } */ diff --git a/gcc/testsuite/gcc.dg/noncompile/920923-1.c b/gcc/testsuite/gcc.dg/noncompile/920923-1.c index 1cb140e..006a071 100644 --- a/gcc/testsuite/gcc.dg/noncompile/920923-1.c +++ b/gcc/testsuite/gcc.dg/noncompile/920923-1.c @@ -1,5 +1,6 @@ /* { dg-message "undeclared identifier is reported only once" "reminder for mmu_base" { target *-*-* } 0 } */ typedef BYTE unsigned char; /* { dg-error "expected" } */ +/* { dg-warning "useless type name in empty declaration" "" { target *-*-* } .-1 } */ typedef int item_n; typedef int perm_set; struct PENT { caddr_t v_addr; };/* { dg-error "unknown type name" } */ diff --git a/gcc/testsuite/gcc.dg/pr7356-2.c b/gcc/testsuite/gcc.dg/pr7356-2.c new file mode 100644 index 0000000..ad67975 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr7356-2.c @@ -0,0 +1,33 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +int i /* { dg-error "6: expected ';' before 'int'" } */ +int j; +/* { dg-begin-multiline-output "" } + int i + ^ + ; + int j; + ~~~ + { dg-end-multiline-output "" } */ + + +void test (void) +{ + int i /* { dg-error "8: expected ';' before 'int'" } */ + int j; + + /* { dg-begin-multiline-output "" } + int i + ^ + ; + int j; + ~~~ + { dg-end-multiline-output "" } */ +} + +int old_style_params (first, second) + int first; + int second; +{ + return first + second; +} diff --git a/gcc/testsuite/gcc.dg/pr7356.c b/gcc/testsuite/gcc.dg/pr7356.c new file mode 100644 index 0000000..84baf07 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr7356.c @@ -0,0 +1,17 @@ +/* { dg-options "-fdiagnostics-show-caret" } */ + +a /* { dg-line stray_token } */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +int main(int argc, char** argv) +{ + return 0; +} + +/* { dg-error "expected ';' before '.*'" "" { target *-*-* } stray_token } */ +/* { dg-begin-multiline-output "" } + a + ^ + ; + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-typenames.c b/gcc/testsuite/gcc.dg/spellcheck-typenames.c index f3b8102..3717ad8 100644 --- a/gcc/testsuite/gcc.dg/spellcheck-typenames.c +++ b/gcc/testsuite/gcc.dg/spellcheck-typenames.c @@ -100,8 +100,9 @@ baz value; /* { dg-error "1: unknown type name .baz.; use .enum. keyword to refe { dg-end-multiline-output "" } */ /* TODO: it would be better to detect the "singed" vs "signed" typo here. */ -singed char ch; /* { dg-error "8: before .char." } */ +singed char ch; /* { dg-error "7: before .char." } */ /* { dg-begin-multiline-output "" } singed char ch; - ^~~~ + ^~~~~ + ; { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/torture/pr82436-2.c b/gcc/testsuite/gcc.dg/torture/pr82436-2.c new file mode 100644 index 0000000..32eda18 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr82436-2.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ + +enum +{ + a, b, c, d, e, f, g, h, j, k +}; + +int l; +void m (short *s) +{ + short n, o, p; + float(*q)[k]; + int r, i; + while (l > 0) + r = l; + for (;;) + { + i = 0; + for (; i < r; i++) + { + { + float ab = q[i][a]; + int i = ab; + p = i; + } + ((short *) s)[0] = p; + { + float ab = q[i][b]; + int i = ab; + o = i; + } + ((short *) s)[1] = o; + { + float ab = q[i][f]; + int i = ab; + n = i; + } + ((short *) s)[2] = n; + float ab = q[i][g]; + int i = ab; + ((short *) s)[3] = i; + s = (short *) s + 4; + } + } +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index 0193c6e..01c37a5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -46,7 +46,7 @@ int xxx(void) /* CRIS keeps the address in a register. */ /* m68k sometimes puts the address in a register, depending on CPU and PIC. */ -/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* } } } */ +/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* crisv32-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* } } } */ /* { dg-final { scan-assembler-times "foo,%r" 5 { target hppa*-*-* } } } */ /* { dg-final { scan-assembler-times "= foo" 5 { target ia64*-*-* } } } */ /* { dg-final { scan-assembler-times "call\[ \t\]*_foo" 5 { target i?86-*-mingw* i?86-*-cygwin* } } } */ @@ -55,3 +55,4 @@ int xxx(void) /* { dg-final { scan-assembler-times "Jsr \\\$r" 5 { target cris-*-* } } } */ /* { dg-final { scan-assembler-times "\[jb\]sr" 5 { target fido-*-* m68k-*-* } } } */ /* { dg-final { scan-assembler-times "bra *tr,r\[1-9\]*,r21" 5 { target visium-*-* } } } */ +/* { dg-final { scan-assembler-times "(?n)\[ \t\]call\[ \t\].*\[ \t\]foo," 5 { target nvptx*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c index 836fa76..1afdb46 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c @@ -83,5 +83,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {{ vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B }}} } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target {{ vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B }}} } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_no_align && { { ! vect_hw_misalign } && vect_sizes_32B_16B } } }} } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c index dc4f520..ac67474 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include <stdarg.h> #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c index f3cc6c7..d020f64 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c @@ -1,4 +1,7 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ #include <stdarg.h> #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 0000000..b7378ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include <arm_neon.h> + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c new file mode 100644 index 0000000..3e7cd6c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c @@ -0,0 +1,81 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ + +#include <arm_neon.h> + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); + +#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4) +#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + t3 f##_##rx2 = {0}; \ + f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \ + if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \ + abort (); \ + t3 f##_##rx3 = {0}; \ + f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \ + if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \ + abort (); + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 0000000..90b00af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i<len; i++) { + prod = X[i] * Y[i]; + result += prod; + } + return result; +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c new file mode 100644 index 0000000..57b5ef8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE signed + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {sdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c new file mode 100644 index 0000000..b2cef31 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE unsigned + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {udot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c index 6080e18..bd0c73c 100644 --- a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c +++ b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ -/* { dg-options "-O3 -fno-math-errno" } */ +/* { dg-options "-O3 -fno-math-errno -fno-trapping-math" } */ #include "lrint-matherr.h" diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c new file mode 100644 index 0000000..1e46755 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +/* Check that we split unaligned LDP/STP into base and aligned offset. */ + +typedef struct +{ + int a, b, c, d, e; +} S; + +void foo (S *); + +void test (int x) +{ + S s = { .a = x }; + foo (&s); +} + +/* { dg-final { scan-assembler-not "mov\tx\[0-9\]+, sp" } } */ diff --git a/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c b/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c index bd85e86..268e9e4 100644 --- a/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c +++ b/gcc/testsuite/gcc.target/arm/require-pic-register-loc.c @@ -18,12 +18,12 @@ main (int argc) /* line 9. */ return 0; } -/* { dg-final { scan-assembler-not "\.loc 1 7 0" } } */ -/* { dg-final { scan-assembler-not "\.loc 1 8 0" } } */ -/* { dg-final { scan-assembler-not "\.loc 1 9 0" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 7 \[0-9\]\+" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 8 \[0-9\]\+" } } */ +/* { dg-final { scan-assembler-not "\.loc 1 9 \[0-9\]\+" } } */ /* The loc at the start of the prologue. */ -/* { dg-final { scan-assembler-times "\.loc 1 10 0" 1 } } */ +/* { dg-final { scan-assembler-times "\.loc 1 10 \[0-9\]\+" 1 } } */ /* The loc at the end of the prologue, with the first user line. */ -/* { dg-final { scan-assembler-times "\.loc 1 11 0" 1 } } */ +/* { dg-final { scan-assembler-times "\.loc 1 11 \[0-9\]\+" 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c new file mode 100644 index 0000000..054f470 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vdot-exec.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ +/* { dg-add-options arm_v8_2a_dotprod_neon } */ + +#include <arm_neon.h> + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c index 4b53e37..d3c30fc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512f" } */ -/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */ +/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */ /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -17,4 +17,29 @@ avx512f_test (void) m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ); m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); + + m = _mm512_cmpeq_pd_mask (x, x); + m = _mm512_mask_cmpeq_pd_mask (m, x, x); + + m = _mm512_cmplt_pd_mask (x, x); + m = _mm512_mask_cmplt_pd_mask (m, x, x); + + m = _mm512_cmple_pd_mask (x, x); + m = _mm512_mask_cmple_pd_mask (m, x, x); + + m = _mm512_cmpunord_pd_mask (x, x); + m = _mm512_mask_cmpunord_pd_mask (m, x, x); + + m = _mm512_cmpneq_pd_mask (x, x); + m = _mm512_mask_cmpneq_pd_mask (m, x, x); + + m = _mm512_cmpnlt_pd_mask (x, x); + m = _mm512_mask_cmpnlt_pd_mask (m, x, x); + + m = _mm512_cmpnle_pd_mask (x, x); + m = _mm512_mask_cmpnle_pd_mask (m, x, x); + + m = _mm512_cmpord_pd_mask (x, x); + m = _mm512_mask_cmpord_pd_mask (m, x, x); } + diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c index 52e226d..cee1197 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c @@ -11,58 +11,69 @@ #define SIZE (AVX512F_LEN / 64) #include "avx512f-mask-type.h" +#undef SUF +#undef SSIZE +#undef GEN_CMP +#undef CHECK_CMP + #if AVX512F_LEN == 512 -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 8; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm512_loadu_pd(s1); \ - source2.x = _mm512_loadu_pd(s2); \ - dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm512##fun +#define SSIZE 8 + +#define GEN_CMP(type) \ + { \ + dst3 = _mm512_cmp##type##_pd_mask(source1.x, source2.x);\ + dst4 = _mm512_mask_cmp##type##_pd_mask(mask, source1.x, source2.x);\ + if (dst3 != dst1) abort(); \ + if (dst4 != dst2) abort(); \ + } + +#define CHECK_CMP(imm) \ + if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \ + if (imm == _CMP_LT_OS) GEN_CMP(lt) \ + if (imm == _CMP_LE_OS) GEN_CMP(le) \ + if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \ + if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \ + if (imm == _CMP_NLT_US) GEN_CMP(nlt) \ + if (imm == _CMP_NLE_US) GEN_CMP(nle) \ + if (imm == _CMP_ORD_Q) GEN_CMP(ord) + #endif #if AVX512F_LEN == 256 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 4; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm256_loadu_pd(s1); \ - source2.x = _mm256_loadu_pd(s2); \ - dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm256##fun +#define SSIZE 4 +#define GEN_CMP(type) +#define CHECK_CMP(imm) #endif #if AVX512F_LEN == 128 +#define SUF(fun) _mm##fun +#define SSIZE 2 +#define GEN_CMP(type) +#define CHECK_CMP(imm) +#endif + #undef CMP #define CMP(imm, rel) \ dst_ref = 0; \ - for (i = 0; i < 2; i++) \ + for (i = 0; i < SSIZE; i++) \ { \ dst_ref = (((int) rel) << i) | dst_ref; \ } \ - source1.x = _mm_loadu_pd(s1); \ - source2.x = _mm_loadu_pd(s2); \ - dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\ - dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\ + source1.x = SUF(_loadu_pd)(s1); \ + source2.x = SUF(_loadu_pd)(s2); \ + dst1 = SUF(_cmp_pd_mask)(source1.x, source2.x, imm);\ + dst2 = SUF(_mask_cmp_pd_mask)(mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); -#endif + if ((dst_ref & mask) != dst2) abort(); \ + CHECK_CMP(imm) void TEST () { UNION_TYPE (AVX512F_LEN, d) source1, source2; - MASK_TYPE dst1, dst2, dst_ref; + MASK_TYPE dst1, dst2, dst3, dst4, dst_ref; MASK_TYPE mask = MASK_VALUE; int i; double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464, diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c index 9812915..27be360 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512f" } */ -/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */ +/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */ /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -17,4 +17,28 @@ avx512f_test (void) m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ); m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC); + + m = _mm512_cmpeq_ps_mask (x, x); + m = _mm512_mask_cmpeq_ps_mask (m, x, x); + + m = _mm512_cmplt_ps_mask (x, x); + m = _mm512_mask_cmplt_ps_mask (m, x, x); + + m = _mm512_cmple_ps_mask (x, x); + m = _mm512_mask_cmple_ps_mask (m, x, x); + + m = _mm512_cmpunord_ps_mask (x, x); + m = _mm512_mask_cmpunord_ps_mask (m, x, x); + + m = _mm512_cmpneq_ps_mask (x, x); + m = _mm512_mask_cmpneq_ps_mask (m, x, x); + + m = _mm512_cmpnlt_ps_mask (x, x); + m = _mm512_mask_cmpnlt_ps_mask (m, x, x); + + m = _mm512_cmpnle_ps_mask (x, x); + m = _mm512_mask_cmpnle_ps_mask (m, x, x); + + m = _mm512_cmpord_ps_mask (x, x); + m = _mm512_mask_cmpord_ps_mask (m, x, x); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c index 2ffa2ed..22e368f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c @@ -11,59 +11,69 @@ #define SIZE (AVX512F_LEN / 32) #include "avx512f-mask-type.h" +#undef SUF +#undef SSIZE +#undef GEN_CMP +#undef CHECK_CMP + #if AVX512F_LEN == 512 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 16; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm512_loadu_ps(s1); \ - source2.x = _mm512_loadu_ps(s2); \ - dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm512##fun +#define SSIZE 16 + +#define GEN_CMP(type) \ + { \ + dst3 = _mm512_cmp##type##_ps_mask(source1.x, source2.x);\ + dst4 = _mm512_mask_cmp##type##_ps_mask(mask, source1.x, source2.x);\ + if (dst3 != dst1) abort(); \ + if (dst4 != dst2) abort(); \ + } + +#define CHECK_CMP(imm) \ + if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \ + if (imm == _CMP_LT_OS) GEN_CMP(lt) \ + if (imm == _CMP_LE_OS) GEN_CMP(le) \ + if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \ + if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \ + if (imm == _CMP_NLT_US) GEN_CMP(nlt) \ + if (imm == _CMP_NLE_US) GEN_CMP(nle) \ + if (imm == _CMP_ORD_Q) GEN_CMP(ord) + #endif #if AVX512F_LEN == 256 -#undef CMP -#define CMP(imm, rel) \ - dst_ref = 0; \ - for (i = 0; i < 8; i++) \ - { \ - dst_ref = (((int) rel) << i) | dst_ref; \ - } \ - source1.x = _mm256_loadu_ps(s1); \ - source2.x = _mm256_loadu_ps(s2); \ - dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ - if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); +#define SUF(fun) _mm256##fun +#define SSIZE 8 +#define GEN_CMP(type) +#define CHECK_CMP(imm) #endif #if AVX512F_LEN == 128 +#define SUF(fun) _mm##fun +#define SSIZE 4 +#define GEN_CMP(type) +#define CHECK_CMP(imm) +#endif + #undef CMP #define CMP(imm, rel) \ dst_ref = 0; \ - for (i = 0; i < 4; i++) \ + for (i = 0; i < SSIZE; i++) \ { \ dst_ref = (((int) rel) << i) | dst_ref; \ } \ - source1.x = _mm_loadu_ps(s1); \ - source2.x = _mm_loadu_ps(s2); \ - dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\ - dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\ + source1.x = SUF(_loadu_ps)(s1); \ + source2.x = SUF(_loadu_ps)(s2); \ + dst1 = SUF(_cmp_ps_mask)(source1.x, source2.x, imm);\ + dst2 = SUF(_mask_cmp_ps_mask)(mask, source1.x, source2.x, imm);\ if (dst_ref != dst1) abort(); \ - if ((dst_ref & mask) != dst2) abort(); -#endif + if ((dst_ref & mask) != dst2) abort(); \ + CHECK_CMP(imm) void TEST () { UNION_TYPE (AVX512F_LEN,) source1, source2; - MASK_TYPE dst1, dst2, dst_ref; + MASK_TYPE dst1, dst2, dst3, dst4, dst_ref; MASK_TYPE mask = MASK_VALUE; int i; float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464, diff --git a/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c b/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c index 12ea9f4..8e54b4b 100644 --- a/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c +++ b/gcc/testsuite/gcc.target/i386/cet-sjlj-5.c @@ -2,7 +2,7 @@ /* { dg-options "-O -fcf-protection -mcet" } */ /* { dg-final { scan-assembler-times "endbr32" 2 { target ia32 } } } */ /* { dg-final { scan-assembler-times "endbr64" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "call _setjmp" 1 } } */ +/* { dg-final { scan-assembler-times "call _?setjmp" 1 } } */ /* { dg-final { scan-assembler-times "call longjmp" 1 } } */ #include <stdio.h> diff --git a/gcc/testsuite/gcc.target/i386/pr81706.c b/gcc/testsuite/gcc.target/i386/pr81706.c new file mode 100644 index 0000000..333fd15 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr81706.c @@ -0,0 +1,32 @@ +/* PR libstdc++/81706 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -mno-avx512f" } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_cos" } } */ +/* { dg-final { scan-assembler "call\[^\n\r]_ZGVdN4v_sin" } } */ + +#ifdef __cplusplus +extern "C" { +#endif +extern double cos (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +extern double sin (double) __attribute__ ((nothrow, leaf, simd ("notinbranch"))); +#ifdef __cplusplus +} +#endif +double p[1024] = { 1.0 }; +double q[1024] = { 1.0 }; + +void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = cos (q[i]); +} + +void +bar (void) +{ + int i; + for (i = 0; i < 1024; i++) + p[i] = __builtin_sin (q[i]); +} diff --git a/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c b/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c new file mode 100644 index 0000000..1083fe6 --- /dev/null +++ b/gcc/testsuite/gcc.target/nios2/gpopt-gprel-sec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mgpopt=local -mgprel-sec=\\.frog.+" } */ + +extern int a __attribute__ ((section (".frog1"))); +static volatile int b __attribute__ ((section (".frog2"))) = 1; +extern int c __attribute__ ((section (".data"))); +static volatile int d __attribute__ ((section (".data"))) = 2; + +extern int e; +static volatile int f = 3; + +volatile int g __attribute__ ((weak)) = 4; + +extern int h[100]; +static int i[100]; +static int j[100] __attribute__ ((section (".sdata"))); + +typedef int (*ftype) (int); +extern int foo (int); + +extern int bar (int, int*, int*, int*, ftype); + +int baz (void) +{ + return bar (a + b + c + d + e + f + g, h, i, j, foo); +} + +/* { dg-final { scan-assembler "%gprel\\(a\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(b\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(c\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(d\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(e\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(f\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(g\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(h\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(i\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(j\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(foo\\)" } } */ diff --git a/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c b/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c new file mode 100644 index 0000000..5fda9e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/nios2/gpopt-r0rel-sec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mgpopt=local -mr0rel-sec=\\.frog.+" } */ + +extern int a __attribute__ ((section (".frog1"))); +static volatile int b __attribute__ ((section (".frog2"))) = 1; +extern int c __attribute__ ((section (".data"))); +static volatile int d __attribute__ ((section (".data"))) = 2; + +extern int e; +static volatile int f = 3; + +volatile int g __attribute__ ((weak)) = 4; + +extern int h[100]; +static int i[100]; +static int j[100] __attribute__ ((section (".sdata"))); + +typedef int (*ftype) (int); +extern int foo (int); + +extern int bar (int, int*, int*, int*, ftype); + +int baz (void) +{ + return bar (a + b + c + d + e + f + g, h, i, j, foo); +} + +/* { dg-final { scan-assembler "%lo\\(a\\)\\(r0\\)" } } */ +/* { dg-final { scan-assembler "%lo\\(b\\)\\(r0\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(c\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(d\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(e\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(f\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(g\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(h\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(i\\)" } } */ +/* { dg-final { scan-assembler "%gprel\\(j\\)" } } */ +/* { dg-final { scan-assembler-not "%gprel\\(foo\\)" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 56ac221..d7ef04f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4396,6 +4396,48 @@ proc check_effective_target_arm_v8_2a_fp16_neon_ok { } { check_effective_target_arm_v8_2a_fp16_neon_ok_nocache] } +# Return 1 if the target supports ARMv8.2 Adv.SIMD Dot Product +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { + global et_arm_v8_2a_dotprod_neon_flags + set et_arm_v8_2a_dotprod_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_dotprod_neon_ok object { + #if !defined (__ARM_FEATURE_DOTPROD) + #error "__ARM_FEATURE_DOTPROD not defined" + #endif + } "$flags -march=armv8.2-a+dotprod"] } { + set et_arm_v8_2a_dotprod_neon_flags "$flags -march=armv8.2-a+dotprod" + return 1 + } + } + + return 0; +} + +proc check_effective_target_arm_v8_2a_dotprod_neon_ok { } { + return [check_cached_effective_target arm_v8_2a_dotprod_neon_ok \ + check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache] +} + +proc add_options_for_arm_v8_2a_dotprod_neon { flags } { + if { ! [check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return "$flags" + } + global et_arm_v8_2a_dotprod_neon_flags + return "$flags $et_arm_v8_2a_dotprod_neon_flags" +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -4533,6 +4575,42 @@ proc check_effective_target_arm_v8_2a_fp16_neon_hw { } { } [add_options_for_arm_v8_2a_fp16_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the Dot Product extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_dotprod_neon_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_dotprod_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + uint8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("udot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vudot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (results[0] == 8 && results[1] == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_dotprod_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -5850,6 +5928,8 @@ proc check_effective_target_vect_sdot_qi { } { } else { set et_vect_sdot_qi_saved($et_index) 0 if { [istarget ia64-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { set et_vect_udot_qi_saved 1 @@ -5874,6 +5954,8 @@ proc check_effective_target_vect_udot_qi { } { } else { set et_vect_udot_qi_saved($et_index) 0 if { [istarget powerpc*-*-*] + || [istarget aarch64*-*-*] + || [istarget arm*-*-*] || [istarget ia64-*-*] || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) } { @@ -8290,7 +8372,7 @@ proc check_effective_target_aarch64_tiny { } { # Create functions to check that the AArch64 assembler supports the # various architecture extensions via the .arch_extension pseudo-op. -foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} { +foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod"} { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } { diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index 7bf8701..e4189d1 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -39,7 +39,6 @@ along with GCC; see the file COPYING3. If not see #include "gimplify.h" #include "gimple-iterator.h" #include "tree-cfg.h" -#include "tree-ssa-loop.h" #include "tree-into-ssa.h" #include "tree-dfa.h" #include "tree-ssa.h" @@ -50,9 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "dbgcnt.h" #include "domwalk.h" #include "tree-ssa-propagate.h" -#include "ipa-utils.h" #include "tree-cfgcleanup.h" -#include "langhooks.h" #include "alias.h" /* Even though this file is called tree-ssa-pre.c, we actually @@ -516,9 +513,6 @@ typedef struct bb_bitmap_sets optimization PRE was able to perform. */ static struct { - /* The number of RHS computations eliminated by PRE. */ - int eliminations; - /* The number of new expressions/temporaries generated by PRE. */ int insertions; @@ -551,12 +545,6 @@ static unsigned int get_expr_value_id (pre_expr); static object_allocator<bitmap_set> bitmap_set_pool ("Bitmap sets"); static bitmap_obstack grand_bitmap_obstack; -/* Set of blocks with statements that have had their EH properties changed. */ -static bitmap need_eh_cleanup; - -/* Set of blocks with statements that have had their AB properties changed. */ -static bitmap need_ab_cleanup; - /* A three tuple {e, pred, v} used to cache phi translations in the phi_translate_table. */ @@ -4042,810 +4030,6 @@ compute_avail (void) free (worklist); } - -/* Local state for the eliminate domwalk. */ -static vec<gimple *> el_to_remove; -static vec<gimple *> el_to_fixup; -static unsigned int el_todo; -static vec<tree> el_avail; -static vec<tree> el_avail_stack; - -/* Return a leader for OP that is available at the current point of the - eliminate domwalk. */ - -static tree -eliminate_avail (tree op) -{ - tree valnum = VN_INFO (op)->valnum; - if (TREE_CODE (valnum) == SSA_NAME) - { - if (SSA_NAME_IS_DEFAULT_DEF (valnum)) - return valnum; - if (el_avail.length () > SSA_NAME_VERSION (valnum)) - return el_avail[SSA_NAME_VERSION (valnum)]; - } - else if (is_gimple_min_invariant (valnum)) - return valnum; - return NULL_TREE; -} - -/* At the current point of the eliminate domwalk make OP available. */ - -static void -eliminate_push_avail (tree op) -{ - tree valnum = VN_INFO (op)->valnum; - if (TREE_CODE (valnum) == SSA_NAME) - { - if (el_avail.length () <= SSA_NAME_VERSION (valnum)) - el_avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); - tree pushop = op; - if (el_avail[SSA_NAME_VERSION (valnum)]) - pushop = el_avail[SSA_NAME_VERSION (valnum)]; - el_avail_stack.safe_push (pushop); - el_avail[SSA_NAME_VERSION (valnum)] = op; - } -} - -/* Insert the expression recorded by SCCVN for VAL at *GSI. Returns - the leader for the expression if insertion was successful. */ - -static tree -eliminate_insert (gimple_stmt_iterator *gsi, tree val) -{ - /* We can insert a sequence with a single assignment only. */ - gimple_seq stmts = VN_INFO (val)->expr; - if (!gimple_seq_singleton_p (stmts)) - return NULL_TREE; - gassign *stmt = dyn_cast <gassign *> (gimple_seq_first_stmt (stmts)); - if (!stmt - || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) - && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR - && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF - && (gimple_assign_rhs_code (stmt) != BIT_AND_EXPR - || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST))) - return NULL_TREE; - - tree op = gimple_assign_rhs1 (stmt); - if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR - || gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) - op = TREE_OPERAND (op, 0); - tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; - if (!leader) - return NULL_TREE; - - tree res; - stmts = NULL; - if (gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) - res = gimple_build (&stmts, BIT_FIELD_REF, - TREE_TYPE (val), leader, - TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), - TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); - else if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) - res = gimple_build (&stmts, BIT_AND_EXPR, - TREE_TYPE (val), leader, gimple_assign_rhs2 (stmt)); - else - res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), - TREE_TYPE (val), leader); - if (TREE_CODE (res) != SSA_NAME - || SSA_NAME_IS_DEFAULT_DEF (res) - || gimple_bb (SSA_NAME_DEF_STMT (res))) - { - gimple_seq_discard (stmts); - - /* During propagation we have to treat SSA info conservatively - and thus we can end up simplifying the inserted expression - at elimination time to sth not defined in stmts. */ - /* But then this is a redundancy we failed to detect. Which means - res now has two values. That doesn't play well with how - we track availability here, so give up. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - { - if (TREE_CODE (res) == SSA_NAME) - res = eliminate_avail (res); - if (res) - { - fprintf (dump_file, "Failed to insert expression for value "); - print_generic_expr (dump_file, val); - fprintf (dump_file, " which is really fully redundant to "); - print_generic_expr (dump_file, res); - fprintf (dump_file, "\n"); - } - } - - return NULL_TREE; - } - else - { - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - VN_INFO_GET (res)->valnum = val; - } - - pre_stats.insertions++; - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Inserted "); - print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (res), 0); - } - - return res; -} - -class eliminate_dom_walker : public dom_walker -{ -public: - eliminate_dom_walker (cdi_direction direction, bool do_pre_) - : dom_walker (direction), do_pre (do_pre_) {} - - virtual edge before_dom_children (basic_block); - virtual void after_dom_children (basic_block); - - bool do_pre; -}; - -/* Perform elimination for the basic-block B during the domwalk. */ - -edge -eliminate_dom_walker::before_dom_children (basic_block b) -{ - /* Mark new bb. */ - el_avail_stack.safe_push (NULL_TREE); - - /* Skip unreachable blocks marked unreachable during the SCCVN domwalk. */ - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, b->preds) - if (e->flags & EDGE_EXECUTABLE) - break; - if (! e) - return NULL; - - for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);) - { - gphi *phi = gsi.phi (); - tree res = PHI_RESULT (phi); - - if (virtual_operand_p (res)) - { - gsi_next (&gsi); - continue; - } - - tree sprime = eliminate_avail (res); - if (sprime - && sprime != res) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced redundant PHI node defining "); - print_generic_expr (dump_file, res); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, "\n"); - } - - /* If we inserted this PHI node ourself, it's not an elimination. */ - if (inserted_exprs - && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (res))) - pre_stats.phis--; - else - pre_stats.eliminations++; - - /* If we will propagate into all uses don't bother to do - anything. */ - if (may_propagate_copy (res, sprime)) - { - /* Mark the PHI for removal. */ - el_to_remove.safe_push (phi); - gsi_next (&gsi); - continue; - } - - remove_phi_node (&gsi, false); - - if (!useless_type_conversion_p (TREE_TYPE (res), TREE_TYPE (sprime))) - sprime = fold_convert (TREE_TYPE (res), sprime); - gimple *stmt = gimple_build_assign (res, sprime); - gimple_stmt_iterator gsi2 = gsi_after_labels (b); - gsi_insert_before (&gsi2, stmt, GSI_NEW_STMT); - continue; - } - - eliminate_push_avail (res); - gsi_next (&gsi); - } - - for (gimple_stmt_iterator gsi = gsi_start_bb (b); - !gsi_end_p (gsi); - gsi_next (&gsi)) - { - tree sprime = NULL_TREE; - gimple *stmt = gsi_stmt (gsi); - tree lhs = gimple_get_lhs (stmt); - if (lhs && TREE_CODE (lhs) == SSA_NAME - && !gimple_has_volatile_ops (stmt) - /* See PR43491. Do not replace a global register variable when - it is a the RHS of an assignment. Do replace local register - variables since gcc does not guarantee a local variable will - be allocated in register. - ??? The fix isn't effective here. This should instead - be ensured by not value-numbering them the same but treating - them like volatiles? */ - && !(gimple_assign_single_p (stmt) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL - && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)) - && is_global_var (gimple_assign_rhs1 (stmt))))) - { - sprime = eliminate_avail (lhs); - if (!sprime) - { - /* If there is no existing usable leader but SCCVN thinks - it has an expression it wants to use as replacement, - insert that. */ - tree val = VN_INFO (lhs)->valnum; - if (val != VN_TOP - && TREE_CODE (val) == SSA_NAME - && VN_INFO (val)->needs_insertion - && VN_INFO (val)->expr != NULL - && (sprime = eliminate_insert (&gsi, val)) != NULL_TREE) - eliminate_push_avail (sprime); - } - - /* If this now constitutes a copy duplicate points-to - and range info appropriately. This is especially - important for inserted code. See tree-ssa-copy.c - for similar code. */ - if (sprime - && TREE_CODE (sprime) == SSA_NAME) - { - basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime)); - if (POINTER_TYPE_P (TREE_TYPE (lhs)) - && VN_INFO_PTR_INFO (lhs) - && ! VN_INFO_PTR_INFO (sprime)) - { - duplicate_ssa_name_ptr_info (sprime, - VN_INFO_PTR_INFO (lhs)); - if (b != sprime_b) - mark_ptr_info_alignment_unknown - (SSA_NAME_PTR_INFO (sprime)); - } - else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - && VN_INFO_RANGE_INFO (lhs) - && ! VN_INFO_RANGE_INFO (sprime) - && b == sprime_b) - duplicate_ssa_name_range_info (sprime, - VN_INFO_RANGE_TYPE (lhs), - VN_INFO_RANGE_INFO (lhs)); - } - - /* Inhibit the use of an inserted PHI on a loop header when - the address of the memory reference is a simple induction - variable. In other cases the vectorizer won't do anything - anyway (either it's loop invariant or a complicated - expression). */ - if (sprime - && TREE_CODE (sprime) == SSA_NAME - && do_pre - && (flag_tree_loop_vectorize || flag_tree_parallelize_loops > 1) - && loop_outer (b->loop_father) - && has_zero_uses (sprime) - && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)) - && gimple_assign_load_p (stmt)) - { - gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); - basic_block def_bb = gimple_bb (def_stmt); - if (gimple_code (def_stmt) == GIMPLE_PHI - && def_bb->loop_father->header == def_bb) - { - loop_p loop = def_bb->loop_father; - ssa_op_iter iter; - tree op; - bool found = false; - FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) - { - affine_iv iv; - def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); - if (def_bb - && flow_bb_inside_loop_p (loop, def_bb) - && simple_iv (loop, loop, op, &iv, true)) - { - found = true; - break; - } - } - if (found) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Not replacing "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " which would add a loop" - " carried dependence to loop %d\n", - loop->num); - } - /* Don't keep sprime available. */ - sprime = NULL_TREE; - } - } - } - - if (sprime) - { - /* If we can propagate the value computed for LHS into - all uses don't bother doing anything with this stmt. */ - if (may_propagate_copy (lhs, sprime)) - { - /* Mark it for removal. */ - el_to_remove.safe_push (stmt); - - /* ??? Don't count copy/constant propagations. */ - if (gimple_assign_single_p (stmt) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME - || gimple_assign_rhs1 (stmt) == sprime)) - continue; - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " in all uses of "); - print_gimple_stmt (dump_file, stmt, 0); - } - - pre_stats.eliminations++; - continue; - } - - /* If this is an assignment from our leader (which - happens in the case the value-number is a constant) - then there is nothing to do. */ - if (gimple_assign_single_p (stmt) - && sprime == gimple_assign_rhs1 (stmt)) - continue; - - /* Else replace its RHS. */ - bool can_make_abnormal_goto - = is_gimple_call (stmt) - && stmt_can_make_abnormal_goto (stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Replaced "); - print_gimple_expr (dump_file, stmt, 0); - fprintf (dump_file, " with "); - print_generic_expr (dump_file, sprime); - fprintf (dump_file, " in "); - print_gimple_stmt (dump_file, stmt, 0); - } - - pre_stats.eliminations++; - gimple *orig_stmt = stmt; - if (!useless_type_conversion_p (TREE_TYPE (lhs), - TREE_TYPE (sprime))) - sprime = fold_convert (TREE_TYPE (lhs), sprime); - tree vdef = gimple_vdef (stmt); - tree vuse = gimple_vuse (stmt); - propagate_tree_value_into_stmt (&gsi, sprime); - stmt = gsi_stmt (gsi); - update_stmt (stmt); - if (vdef != gimple_vdef (stmt)) - VN_INFO (vdef)->valnum = vuse; - - /* If we removed EH side-effects from the statement, clean - its EH information. */ - if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) - { - bitmap_set_bit (need_eh_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed EH side-effects.\n"); - } - - /* Likewise for AB side-effects. */ - if (can_make_abnormal_goto - && !stmt_can_make_abnormal_goto (stmt)) - { - bitmap_set_bit (need_ab_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed AB side-effects.\n"); - } - - continue; - } - } - - /* If the statement is a scalar store, see if the expression - has the same value number as its rhs. If so, the store is - dead. */ - if (gimple_assign_single_p (stmt) - && !gimple_has_volatile_ops (stmt) - && !is_gimple_reg (gimple_assign_lhs (stmt)) - && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME - || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) - { - tree val; - tree rhs = gimple_assign_rhs1 (stmt); - vn_reference_t vnresult; - val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, - &vnresult, false); - if (TREE_CODE (rhs) == SSA_NAME) - rhs = VN_INFO (rhs)->valnum; - if (val - && operand_equal_p (val, rhs, 0)) - { - /* We can only remove the later store if the former aliases - at least all accesses the later one does or if the store - was to readonly memory storing the same value. */ - alias_set_type set = get_alias_set (lhs); - if (! vnresult - || vnresult->set == set - || alias_set_subset_of (set, vnresult->set)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Deleted redundant store "); - print_gimple_stmt (dump_file, stmt, 0); - } - - /* Queue stmt for removal. */ - el_to_remove.safe_push (stmt); - continue; - } - } - } - - /* If this is a control statement value numbering left edges - unexecuted on force the condition in a way consistent with - that. */ - if (gcond *cond = dyn_cast <gcond *> (stmt)) - { - if ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) - ^ (EDGE_SUCC (b, 1)->flags & EDGE_EXECUTABLE)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Removing unexecutable edge from "); - print_gimple_stmt (dump_file, stmt, 0); - } - if (((EDGE_SUCC (b, 0)->flags & EDGE_TRUE_VALUE) != 0) - == ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) != 0)) - gimple_cond_make_true (cond); - else - gimple_cond_make_false (cond); - update_stmt (cond); - el_todo |= TODO_cleanup_cfg; - continue; - } - } - - bool can_make_abnormal_goto = stmt_can_make_abnormal_goto (stmt); - bool was_noreturn = (is_gimple_call (stmt) - && gimple_call_noreturn_p (stmt)); - tree vdef = gimple_vdef (stmt); - tree vuse = gimple_vuse (stmt); - - /* If we didn't replace the whole stmt (or propagate the result - into all uses), replace all uses on this stmt with their - leaders. */ - bool modified = false; - use_operand_p use_p; - ssa_op_iter iter; - FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) - { - tree use = USE_FROM_PTR (use_p); - /* ??? The call code above leaves stmt operands un-updated. */ - if (TREE_CODE (use) != SSA_NAME) - continue; - tree sprime = eliminate_avail (use); - if (sprime && sprime != use - && may_propagate_copy (use, sprime) - /* We substitute into debug stmts to avoid excessive - debug temporaries created by removed stmts, but we need - to avoid doing so for inserted sprimes as we never want - to create debug temporaries for them. */ - && (!inserted_exprs - || TREE_CODE (sprime) != SSA_NAME - || !is_gimple_debug (stmt) - || !bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)))) - { - propagate_value (use_p, sprime); - modified = true; - } - } - - /* Fold the stmt if modified, this canonicalizes MEM_REFs we propagated - into which is a requirement for the IPA devirt machinery. */ - gimple *old_stmt = stmt; - if (modified) - { - /* If a formerly non-invariant ADDR_EXPR is turned into an - invariant one it was on a separate stmt. */ - if (gimple_assign_single_p (stmt) - && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR) - recompute_tree_invariant_for_addr_expr (gimple_assign_rhs1 (stmt)); - gimple_stmt_iterator prev = gsi; - gsi_prev (&prev); - if (fold_stmt (&gsi)) - { - /* fold_stmt may have created new stmts inbetween - the previous stmt and the folded stmt. Mark - all defs created there as varying to not confuse - the SCCVN machinery as we're using that even during - elimination. */ - if (gsi_end_p (prev)) - prev = gsi_start_bb (b); - else - gsi_next (&prev); - if (gsi_stmt (prev) != gsi_stmt (gsi)) - do - { - tree def; - ssa_op_iter dit; - FOR_EACH_SSA_TREE_OPERAND (def, gsi_stmt (prev), - dit, SSA_OP_ALL_DEFS) - /* As existing DEFs may move between stmts - we have to guard VN_INFO_GET. */ - if (! has_VN_INFO (def)) - VN_INFO_GET (def)->valnum = def; - if (gsi_stmt (prev) == gsi_stmt (gsi)) - break; - gsi_next (&prev); - } - while (1); - } - stmt = gsi_stmt (gsi); - /* In case we folded the stmt away schedule the NOP for removal. */ - if (gimple_nop_p (stmt)) - el_to_remove.safe_push (stmt); - } - - /* Visit indirect calls and turn them into direct calls if - possible using the devirtualization machinery. Do this before - checking for required EH/abnormal/noreturn cleanup as devird - may expose more of those. */ - if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) - { - tree fn = gimple_call_fn (call_stmt); - if (fn - && flag_devirtualize - && virtual_method_call_p (fn)) - { - tree otr_type = obj_type_ref_class (fn); - unsigned HOST_WIDE_INT otr_tok - = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (fn)); - tree instance; - ipa_polymorphic_call_context context (current_function_decl, - fn, stmt, &instance); - context.get_dynamic_type (instance, OBJ_TYPE_REF_OBJECT (fn), - otr_type, stmt); - bool final; - vec <cgraph_node *> targets - = possible_polymorphic_call_targets (obj_type_ref_class (fn), - otr_tok, context, &final); - if (dump_file) - dump_possible_polymorphic_call_targets (dump_file, - obj_type_ref_class (fn), - otr_tok, context); - if (final && targets.length () <= 1 && dbg_cnt (devirt)) - { - tree fn; - if (targets.length () == 1) - fn = targets[0]->decl; - else - fn = builtin_decl_implicit (BUILT_IN_UNREACHABLE); - if (dump_enabled_p ()) - { - location_t loc = gimple_location (stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "converting indirect call to " - "function %s\n", - lang_hooks.decl_printable_name (fn, 2)); - } - gimple_call_set_fndecl (call_stmt, fn); - /* If changing the call to __builtin_unreachable - or similar noreturn function, adjust gimple_call_fntype - too. */ - if (gimple_call_noreturn_p (call_stmt) - && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (fn))) - && TYPE_ARG_TYPES (TREE_TYPE (fn)) - && (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fn))) - == void_type_node)) - gimple_call_set_fntype (call_stmt, TREE_TYPE (fn)); - maybe_remove_unused_call_args (cfun, call_stmt); - modified = true; - } - } - } - - if (modified) - { - /* When changing a call into a noreturn call, cfg cleanup - is needed to fix up the noreturn call. */ - if (!was_noreturn - && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) - el_to_fixup.safe_push (stmt); - /* When changing a condition or switch into one we know what - edge will be executed, schedule a cfg cleanup. */ - if ((gimple_code (stmt) == GIMPLE_COND - && (gimple_cond_true_p (as_a <gcond *> (stmt)) - || gimple_cond_false_p (as_a <gcond *> (stmt)))) - || (gimple_code (stmt) == GIMPLE_SWITCH - && TREE_CODE (gimple_switch_index - (as_a <gswitch *> (stmt))) == INTEGER_CST)) - el_todo |= TODO_cleanup_cfg; - /* If we removed EH side-effects from the statement, clean - its EH information. */ - if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) - { - bitmap_set_bit (need_eh_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed EH side-effects.\n"); - } - /* Likewise for AB side-effects. */ - if (can_make_abnormal_goto - && !stmt_can_make_abnormal_goto (stmt)) - { - bitmap_set_bit (need_ab_cleanup, - gimple_bb (stmt)->index); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " Removed AB side-effects.\n"); - } - update_stmt (stmt); - if (vdef != gimple_vdef (stmt)) - VN_INFO (vdef)->valnum = vuse; - } - - /* Make new values available - for fully redundant LHS we - continue with the next stmt above and skip this. */ - def_operand_p defp; - FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF) - eliminate_push_avail (DEF_FROM_PTR (defp)); - } - - /* Replace destination PHI arguments. */ - FOR_EACH_EDGE (e, ei, b->succs) - if (e->flags & EDGE_EXECUTABLE) - for (gphi_iterator gsi = gsi_start_phis (e->dest); - !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gphi *phi = gsi.phi (); - use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e); - tree arg = USE_FROM_PTR (use_p); - if (TREE_CODE (arg) != SSA_NAME - || virtual_operand_p (arg)) - continue; - tree sprime = eliminate_avail (arg); - if (sprime && may_propagate_copy (arg, sprime)) - propagate_value (use_p, sprime); - } - return NULL; -} - -/* Make no longer available leaders no longer available. */ - -void -eliminate_dom_walker::after_dom_children (basic_block) -{ - tree entry; - while ((entry = el_avail_stack.pop ()) != NULL_TREE) - { - tree valnum = VN_INFO (entry)->valnum; - tree old = el_avail[SSA_NAME_VERSION (valnum)]; - if (old == entry) - el_avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; - else - el_avail[SSA_NAME_VERSION (valnum)] = entry; - } -} - -/* Eliminate fully redundant computations. */ - -static unsigned int -eliminate (bool do_pre) -{ - need_eh_cleanup = BITMAP_ALLOC (NULL); - need_ab_cleanup = BITMAP_ALLOC (NULL); - - el_to_remove.create (0); - el_to_fixup.create (0); - el_todo = 0; - el_avail.create (num_ssa_names); - el_avail_stack.create (0); - - eliminate_dom_walker (CDI_DOMINATORS, - do_pre).walk (cfun->cfg->x_entry_block_ptr); - - el_avail.release (); - el_avail_stack.release (); - - return el_todo; -} - -/* Perform CFG cleanups made necessary by elimination. */ - -static unsigned -fini_eliminate (void) -{ - gimple_stmt_iterator gsi; - gimple *stmt; - unsigned todo = 0; - - /* We cannot remove stmts during BB walk, especially not release SSA - names there as this confuses the VN machinery. The stmts ending - up in el_to_remove are either stores or simple copies. - Remove stmts in reverse order to make debug stmt creation possible. */ - while (!el_to_remove.is_empty ()) - { - stmt = el_to_remove.pop (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Removing dead stmt "); - print_gimple_stmt (dump_file, stmt, 0, 0); - } - - gsi = gsi_for_stmt (stmt); - if (gimple_code (stmt) == GIMPLE_PHI) - remove_phi_node (&gsi, true); - else - { - basic_block bb = gimple_bb (stmt); - unlink_stmt_vdef (stmt); - if (gsi_remove (&gsi, true)) - bitmap_set_bit (need_eh_cleanup, bb->index); - if (is_gimple_call (stmt) && stmt_can_make_abnormal_goto (stmt)) - bitmap_set_bit (need_ab_cleanup, bb->index); - release_defs (stmt); - } - - /* Removing a stmt may expose a forwarder block. */ - todo |= TODO_cleanup_cfg; - } - el_to_remove.release (); - - /* Fixup stmts that became noreturn calls. This may require splitting - blocks and thus isn't possible during the dominator walk. Do this - in reverse order so we don't inadvertedly remove a stmt we want to - fixup by visiting a dominating now noreturn call first. */ - while (!el_to_fixup.is_empty ()) - { - stmt = el_to_fixup.pop (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Fixing up noreturn call "); - print_gimple_stmt (dump_file, stmt, 0); - } - - if (fixup_noreturn_call (stmt)) - todo |= TODO_cleanup_cfg; - } - el_to_fixup.release (); - - bool do_eh_cleanup = !bitmap_empty_p (need_eh_cleanup); - bool do_ab_cleanup = !bitmap_empty_p (need_ab_cleanup); - - if (do_eh_cleanup) - gimple_purge_all_dead_eh_edges (need_eh_cleanup); - - if (do_ab_cleanup) - gimple_purge_all_dead_abnormal_call_edges (need_ab_cleanup); - - BITMAP_FREE (need_eh_cleanup); - BITMAP_FREE (need_ab_cleanup); - - if (do_eh_cleanup || do_ab_cleanup) - todo |= TODO_cleanup_cfg; - return todo; -} - /* Cheap DCE of a known set of possibly dead stmts. Because we don't follow exactly the standard PRE algorithm, and decide not @@ -5032,18 +4216,16 @@ pass_pre::execute (function *fun) gcc_assert (!need_ssa_update_p (fun)); /* Remove all the redundant expressions. */ - todo |= eliminate (true); + todo |= vn_eliminate (inserted_exprs); statistics_counter_event (fun, "Insertions", pre_stats.insertions); statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert); statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert); statistics_counter_event (fun, "New PHIs", pre_stats.phis); - statistics_counter_event (fun, "Eliminated", pre_stats.eliminations); clear_expression_ids (); scev_finalize (); - todo |= fini_eliminate (); remove_dead_inserted_code (); fini_pre (); loop_optimizer_finalize (); @@ -5079,63 +4261,3 @@ make_pass_pre (gcc::context *ctxt) { return new pass_pre (ctxt); } - -namespace { - -const pass_data pass_data_fre = -{ - GIMPLE_PASS, /* type */ - "fre", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_TREE_FRE, /* tv_id */ - ( PROP_cfg | PROP_ssa ), /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_fre : public gimple_opt_pass -{ -public: - pass_fre (gcc::context *ctxt) - : gimple_opt_pass (pass_data_fre, ctxt) - {} - - /* opt_pass methods: */ - opt_pass * clone () { return new pass_fre (m_ctxt); } - virtual bool gate (function *) { return flag_tree_fre != 0; } - virtual unsigned int execute (function *); - -}; // class pass_fre - -unsigned int -pass_fre::execute (function *fun) -{ - unsigned int todo = 0; - - run_scc_vn (VN_WALKREWRITE); - - memset (&pre_stats, 0, sizeof (pre_stats)); - - /* Remove all the redundant expressions. */ - todo |= eliminate (false); - - todo |= fini_eliminate (); - - scc_vn_restore_ssa_info (); - free_scc_vn (); - - statistics_counter_event (fun, "Insertions", pre_stats.insertions); - statistics_counter_event (fun, "Eliminated", pre_stats.eliminations); - - return todo; -} - -} // anon namespace - -gimple_opt_pass * -make_pass_fre (gcc::context *ctxt) -{ - return new pass_fre (ctxt); -} diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index d27bcee..306080b 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -55,13 +55,21 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "params.h" #include "tree-ssa-propagate.h" -#include "tree-ssa-sccvn.h" #include "tree-cfg.h" #include "domwalk.h" #include "gimple-iterator.h" #include "gimple-match.h" #include "stringpool.h" #include "attribs.h" +#include "tree-pass.h" +#include "statistics.h" +#include "langhooks.h" +#include "ipa-utils.h" +#include "dbgcnt.h" +#include "tree-cfgcleanup.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" +#include "tree-ssa-sccvn.h" /* This algorithm is based on the SCC algorithm presented by Keith Cooper and L. Taylor Simpson in "SCC-Based Value numbering" @@ -5149,3 +5157,868 @@ vn_nary_may_trap (vn_nary_op_t nary) return false; } + + +class eliminate_dom_walker : public dom_walker +{ +public: + eliminate_dom_walker (cdi_direction, bitmap); + ~eliminate_dom_walker (); + + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); + + tree eliminate_avail (tree op); + void eliminate_push_avail (tree op); + tree eliminate_insert (gimple_stmt_iterator *gsi, tree val); + + bool do_pre; + unsigned int el_todo; + unsigned int eliminations; + unsigned int insertions; + + /* SSA names that had their defs inserted by PRE if do_pre. */ + bitmap inserted_exprs; + + /* Blocks with statements that have had their EH properties changed. */ + bitmap need_eh_cleanup; + + /* Blocks with statements that have had their AB properties changed. */ + bitmap need_ab_cleanup; + + auto_vec<gimple *> to_remove; + auto_vec<gimple *> to_fixup; + auto_vec<tree> avail; + auto_vec<tree> avail_stack; +}; + +eliminate_dom_walker::eliminate_dom_walker (cdi_direction direction, + bitmap inserted_exprs_) + : dom_walker (direction), do_pre (inserted_exprs_ != NULL), + el_todo (0), eliminations (0), insertions (0), + inserted_exprs (inserted_exprs_) +{ + need_eh_cleanup = BITMAP_ALLOC (NULL); + need_ab_cleanup = BITMAP_ALLOC (NULL); +} + +eliminate_dom_walker::~eliminate_dom_walker () +{ + BITMAP_FREE (need_eh_cleanup); + BITMAP_FREE (need_ab_cleanup); +} + +/* Return a leader for OP that is available at the current point of the + eliminate domwalk. */ + +tree +eliminate_dom_walker::eliminate_avail (tree op) +{ + tree valnum = VN_INFO (op)->valnum; + if (TREE_CODE (valnum) == SSA_NAME) + { + if (SSA_NAME_IS_DEFAULT_DEF (valnum)) + return valnum; + if (avail.length () > SSA_NAME_VERSION (valnum)) + return avail[SSA_NAME_VERSION (valnum)]; + } + else if (is_gimple_min_invariant (valnum)) + return valnum; + return NULL_TREE; +} + +/* At the current point of the eliminate domwalk make OP available. */ + +void +eliminate_dom_walker::eliminate_push_avail (tree op) +{ + tree valnum = VN_INFO (op)->valnum; + if (TREE_CODE (valnum) == SSA_NAME) + { + if (avail.length () <= SSA_NAME_VERSION (valnum)) + avail.safe_grow_cleared (SSA_NAME_VERSION (valnum) + 1); + tree pushop = op; + if (avail[SSA_NAME_VERSION (valnum)]) + pushop = avail[SSA_NAME_VERSION (valnum)]; + avail_stack.safe_push (pushop); + avail[SSA_NAME_VERSION (valnum)] = op; + } +} + +/* Insert the expression recorded by SCCVN for VAL at *GSI. Returns + the leader for the expression if insertion was successful. */ + +tree +eliminate_dom_walker::eliminate_insert (gimple_stmt_iterator *gsi, tree val) +{ + /* We can insert a sequence with a single assignment only. */ + gimple_seq stmts = VN_INFO (val)->expr; + if (!gimple_seq_singleton_p (stmts)) + return NULL_TREE; + gassign *stmt = dyn_cast <gassign *> (gimple_seq_first_stmt (stmts)); + if (!stmt + || (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) + && gimple_assign_rhs_code (stmt) != VIEW_CONVERT_EXPR + && gimple_assign_rhs_code (stmt) != BIT_FIELD_REF + && (gimple_assign_rhs_code (stmt) != BIT_AND_EXPR + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST))) + return NULL_TREE; + + tree op = gimple_assign_rhs1 (stmt); + if (gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR + || gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) + op = TREE_OPERAND (op, 0); + tree leader = TREE_CODE (op) == SSA_NAME ? eliminate_avail (op) : op; + if (!leader) + return NULL_TREE; + + tree res; + stmts = NULL; + if (gimple_assign_rhs_code (stmt) == BIT_FIELD_REF) + res = gimple_build (&stmts, BIT_FIELD_REF, + TREE_TYPE (val), leader, + TREE_OPERAND (gimple_assign_rhs1 (stmt), 1), + TREE_OPERAND (gimple_assign_rhs1 (stmt), 2)); + else if (gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) + res = gimple_build (&stmts, BIT_AND_EXPR, + TREE_TYPE (val), leader, gimple_assign_rhs2 (stmt)); + else + res = gimple_build (&stmts, gimple_assign_rhs_code (stmt), + TREE_TYPE (val), leader); + if (TREE_CODE (res) != SSA_NAME + || SSA_NAME_IS_DEFAULT_DEF (res) + || gimple_bb (SSA_NAME_DEF_STMT (res))) + { + gimple_seq_discard (stmts); + + /* During propagation we have to treat SSA info conservatively + and thus we can end up simplifying the inserted expression + at elimination time to sth not defined in stmts. */ + /* But then this is a redundancy we failed to detect. Which means + res now has two values. That doesn't play well with how + we track availability here, so give up. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (TREE_CODE (res) == SSA_NAME) + res = eliminate_avail (res); + if (res) + { + fprintf (dump_file, "Failed to insert expression for value "); + print_generic_expr (dump_file, val); + fprintf (dump_file, " which is really fully redundant to "); + print_generic_expr (dump_file, res); + fprintf (dump_file, "\n"); + } + } + + return NULL_TREE; + } + else + { + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + VN_INFO_GET (res)->valnum = val; + } + + insertions++; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Inserted "); + print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (res), 0); + } + + return res; +} + + + +/* Perform elimination for the basic-block B during the domwalk. */ + +edge +eliminate_dom_walker::before_dom_children (basic_block b) +{ + /* Mark new bb. */ + avail_stack.safe_push (NULL_TREE); + + /* Skip unreachable blocks marked unreachable during the SCCVN domwalk. */ + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, b->preds) + if (e->flags & EDGE_EXECUTABLE) + break; + if (! e) + return NULL; + + for (gphi_iterator gsi = gsi_start_phis (b); !gsi_end_p (gsi);) + { + gphi *phi = gsi.phi (); + tree res = PHI_RESULT (phi); + + if (virtual_operand_p (res)) + { + gsi_next (&gsi); + continue; + } + + tree sprime = eliminate_avail (res); + if (sprime + && sprime != res) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced redundant PHI node defining "); + print_generic_expr (dump_file, res); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, "\n"); + } + + /* If we inserted this PHI node ourself, it's not an elimination. */ + if (! inserted_exprs + || ! bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (res))) + eliminations++; + + /* If we will propagate into all uses don't bother to do + anything. */ + if (may_propagate_copy (res, sprime)) + { + /* Mark the PHI for removal. */ + to_remove.safe_push (phi); + gsi_next (&gsi); + continue; + } + + remove_phi_node (&gsi, false); + + if (!useless_type_conversion_p (TREE_TYPE (res), TREE_TYPE (sprime))) + sprime = fold_convert (TREE_TYPE (res), sprime); + gimple *stmt = gimple_build_assign (res, sprime); + gimple_stmt_iterator gsi2 = gsi_after_labels (b); + gsi_insert_before (&gsi2, stmt, GSI_NEW_STMT); + continue; + } + + eliminate_push_avail (res); + gsi_next (&gsi); + } + + for (gimple_stmt_iterator gsi = gsi_start_bb (b); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + tree sprime = NULL_TREE; + gimple *stmt = gsi_stmt (gsi); + tree lhs = gimple_get_lhs (stmt); + if (lhs && TREE_CODE (lhs) == SSA_NAME + && !gimple_has_volatile_ops (stmt) + /* See PR43491. Do not replace a global register variable when + it is a the RHS of an assignment. Do replace local register + variables since gcc does not guarantee a local variable will + be allocated in register. + ??? The fix isn't effective here. This should instead + be ensured by not value-numbering them the same but treating + them like volatiles? */ + && !(gimple_assign_single_p (stmt) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL + && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)) + && is_global_var (gimple_assign_rhs1 (stmt))))) + { + sprime = eliminate_avail (lhs); + if (!sprime) + { + /* If there is no existing usable leader but SCCVN thinks + it has an expression it wants to use as replacement, + insert that. */ + tree val = VN_INFO (lhs)->valnum; + if (val != VN_TOP + && TREE_CODE (val) == SSA_NAME + && VN_INFO (val)->needs_insertion + && VN_INFO (val)->expr != NULL + && (sprime = eliminate_insert (&gsi, val)) != NULL_TREE) + eliminate_push_avail (sprime); + } + + /* If this now constitutes a copy duplicate points-to + and range info appropriately. This is especially + important for inserted code. See tree-ssa-copy.c + for similar code. */ + if (sprime + && TREE_CODE (sprime) == SSA_NAME) + { + basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime)); + if (POINTER_TYPE_P (TREE_TYPE (lhs)) + && VN_INFO_PTR_INFO (lhs) + && ! VN_INFO_PTR_INFO (sprime)) + { + duplicate_ssa_name_ptr_info (sprime, + VN_INFO_PTR_INFO (lhs)); + if (b != sprime_b) + mark_ptr_info_alignment_unknown + (SSA_NAME_PTR_INFO (sprime)); + } + else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && VN_INFO_RANGE_INFO (lhs) + && ! VN_INFO_RANGE_INFO (sprime) + && b == sprime_b) + duplicate_ssa_name_range_info (sprime, + VN_INFO_RANGE_TYPE (lhs), + VN_INFO_RANGE_INFO (lhs)); + } + + /* Inhibit the use of an inserted PHI on a loop header when + the address of the memory reference is a simple induction + variable. In other cases the vectorizer won't do anything + anyway (either it's loop invariant or a complicated + expression). */ + if (sprime + && TREE_CODE (sprime) == SSA_NAME + && do_pre + && (flag_tree_loop_vectorize || flag_tree_parallelize_loops > 1) + && loop_outer (b->loop_father) + && has_zero_uses (sprime) + && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)) + && gimple_assign_load_p (stmt)) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); + basic_block def_bb = gimple_bb (def_stmt); + if (gimple_code (def_stmt) == GIMPLE_PHI + && def_bb->loop_father->header == def_bb) + { + loop_p loop = def_bb->loop_father; + ssa_op_iter iter; + tree op; + bool found = false; + FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) + { + affine_iv iv; + def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); + if (def_bb + && flow_bb_inside_loop_p (loop, def_bb) + && simple_iv (loop, loop, op, &iv, true)) + { + found = true; + break; + } + } + if (found) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Not replacing "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " which would add a loop" + " carried dependence to loop %d\n", + loop->num); + } + /* Don't keep sprime available. */ + sprime = NULL_TREE; + } + } + } + + if (sprime) + { + /* If we can propagate the value computed for LHS into + all uses don't bother doing anything with this stmt. */ + if (may_propagate_copy (lhs, sprime)) + { + /* Mark it for removal. */ + to_remove.safe_push (stmt); + + /* ??? Don't count copy/constant propagations. */ + if (gimple_assign_single_p (stmt) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME + || gimple_assign_rhs1 (stmt) == sprime)) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " in all uses of "); + print_gimple_stmt (dump_file, stmt, 0); + } + + eliminations++; + continue; + } + + /* If this is an assignment from our leader (which + happens in the case the value-number is a constant) + then there is nothing to do. */ + if (gimple_assign_single_p (stmt) + && sprime == gimple_assign_rhs1 (stmt)) + continue; + + /* Else replace its RHS. */ + bool can_make_abnormal_goto + = is_gimple_call (stmt) + && stmt_can_make_abnormal_goto (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replaced "); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, " with "); + print_generic_expr (dump_file, sprime); + fprintf (dump_file, " in "); + print_gimple_stmt (dump_file, stmt, 0); + } + + eliminations++; + gimple *orig_stmt = stmt; + if (!useless_type_conversion_p (TREE_TYPE (lhs), + TREE_TYPE (sprime))) + sprime = fold_convert (TREE_TYPE (lhs), sprime); + tree vdef = gimple_vdef (stmt); + tree vuse = gimple_vuse (stmt); + propagate_tree_value_into_stmt (&gsi, sprime); + stmt = gsi_stmt (gsi); + update_stmt (stmt); + if (vdef != gimple_vdef (stmt)) + VN_INFO (vdef)->valnum = vuse; + + /* If we removed EH side-effects from the statement, clean + its EH information. */ + if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) + { + bitmap_set_bit (need_eh_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed EH side-effects.\n"); + } + + /* Likewise for AB side-effects. */ + if (can_make_abnormal_goto + && !stmt_can_make_abnormal_goto (stmt)) + { + bitmap_set_bit (need_ab_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed AB side-effects.\n"); + } + + continue; + } + } + + /* If the statement is a scalar store, see if the expression + has the same value number as its rhs. If so, the store is + dead. */ + if (gimple_assign_single_p (stmt) + && !gimple_has_volatile_ops (stmt) + && !is_gimple_reg (gimple_assign_lhs (stmt)) + && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME + || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))) + { + tree val; + tree rhs = gimple_assign_rhs1 (stmt); + vn_reference_t vnresult; + val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, + &vnresult, false); + if (TREE_CODE (rhs) == SSA_NAME) + rhs = VN_INFO (rhs)->valnum; + if (val + && operand_equal_p (val, rhs, 0)) + { + /* We can only remove the later store if the former aliases + at least all accesses the later one does or if the store + was to readonly memory storing the same value. */ + alias_set_type set = get_alias_set (lhs); + if (! vnresult + || vnresult->set == set + || alias_set_subset_of (set, vnresult->set)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Deleted redundant store "); + print_gimple_stmt (dump_file, stmt, 0); + } + + /* Queue stmt for removal. */ + to_remove.safe_push (stmt); + continue; + } + } + } + + /* If this is a control statement value numbering left edges + unexecuted on force the condition in a way consistent with + that. */ + if (gcond *cond = dyn_cast <gcond *> (stmt)) + { + if ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) + ^ (EDGE_SUCC (b, 1)->flags & EDGE_EXECUTABLE)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Removing unexecutable edge from "); + print_gimple_stmt (dump_file, stmt, 0); + } + if (((EDGE_SUCC (b, 0)->flags & EDGE_TRUE_VALUE) != 0) + == ((EDGE_SUCC (b, 0)->flags & EDGE_EXECUTABLE) != 0)) + gimple_cond_make_true (cond); + else + gimple_cond_make_false (cond); + update_stmt (cond); + el_todo |= TODO_cleanup_cfg; + continue; + } + } + + bool can_make_abnormal_goto = stmt_can_make_abnormal_goto (stmt); + bool was_noreturn = (is_gimple_call (stmt) + && gimple_call_noreturn_p (stmt)); + tree vdef = gimple_vdef (stmt); + tree vuse = gimple_vuse (stmt); + + /* If we didn't replace the whole stmt (or propagate the result + into all uses), replace all uses on this stmt with their + leaders. */ + bool modified = false; + use_operand_p use_p; + ssa_op_iter iter; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree use = USE_FROM_PTR (use_p); + /* ??? The call code above leaves stmt operands un-updated. */ + if (TREE_CODE (use) != SSA_NAME) + continue; + tree sprime = eliminate_avail (use); + if (sprime && sprime != use + && may_propagate_copy (use, sprime) + /* We substitute into debug stmts to avoid excessive + debug temporaries created by removed stmts, but we need + to avoid doing so for inserted sprimes as we never want + to create debug temporaries for them. */ + && (!inserted_exprs + || TREE_CODE (sprime) != SSA_NAME + || !is_gimple_debug (stmt) + || !bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime)))) + { + propagate_value (use_p, sprime); + modified = true; + } + } + + /* Fold the stmt if modified, this canonicalizes MEM_REFs we propagated + into which is a requirement for the IPA devirt machinery. */ + gimple *old_stmt = stmt; + if (modified) + { + /* If a formerly non-invariant ADDR_EXPR is turned into an + invariant one it was on a separate stmt. */ + if (gimple_assign_single_p (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (gimple_assign_rhs1 (stmt)); + gimple_stmt_iterator prev = gsi; + gsi_prev (&prev); + if (fold_stmt (&gsi)) + { + /* fold_stmt may have created new stmts inbetween + the previous stmt and the folded stmt. Mark + all defs created there as varying to not confuse + the SCCVN machinery as we're using that even during + elimination. */ + if (gsi_end_p (prev)) + prev = gsi_start_bb (b); + else + gsi_next (&prev); + if (gsi_stmt (prev) != gsi_stmt (gsi)) + do + { + tree def; + ssa_op_iter dit; + FOR_EACH_SSA_TREE_OPERAND (def, gsi_stmt (prev), + dit, SSA_OP_ALL_DEFS) + /* As existing DEFs may move between stmts + we have to guard VN_INFO_GET. */ + if (! has_VN_INFO (def)) + VN_INFO_GET (def)->valnum = def; + if (gsi_stmt (prev) == gsi_stmt (gsi)) + break; + gsi_next (&prev); + } + while (1); + } + stmt = gsi_stmt (gsi); + /* In case we folded the stmt away schedule the NOP for removal. */ + if (gimple_nop_p (stmt)) + to_remove.safe_push (stmt); + } + + /* Visit indirect calls and turn them into direct calls if + possible using the devirtualization machinery. Do this before + checking for required EH/abnormal/noreturn cleanup as devird + may expose more of those. */ + if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) + { + tree fn = gimple_call_fn (call_stmt); + if (fn + && flag_devirtualize + && virtual_method_call_p (fn)) + { + tree otr_type = obj_type_ref_class (fn); + unsigned HOST_WIDE_INT otr_tok + = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (fn)); + tree instance; + ipa_polymorphic_call_context context (current_function_decl, + fn, stmt, &instance); + context.get_dynamic_type (instance, OBJ_TYPE_REF_OBJECT (fn), + otr_type, stmt); + bool final; + vec <cgraph_node *> targets + = possible_polymorphic_call_targets (obj_type_ref_class (fn), + otr_tok, context, &final); + if (dump_file) + dump_possible_polymorphic_call_targets (dump_file, + obj_type_ref_class (fn), + otr_tok, context); + if (final && targets.length () <= 1 && dbg_cnt (devirt)) + { + tree fn; + if (targets.length () == 1) + fn = targets[0]->decl; + else + fn = builtin_decl_implicit (BUILT_IN_UNREACHABLE); + if (dump_enabled_p ()) + { + location_t loc = gimple_location (stmt); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "converting indirect call to " + "function %s\n", + lang_hooks.decl_printable_name (fn, 2)); + } + gimple_call_set_fndecl (call_stmt, fn); + /* If changing the call to __builtin_unreachable + or similar noreturn function, adjust gimple_call_fntype + too. */ + if (gimple_call_noreturn_p (call_stmt) + && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (fn))) + && TYPE_ARG_TYPES (TREE_TYPE (fn)) + && (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fn))) + == void_type_node)) + gimple_call_set_fntype (call_stmt, TREE_TYPE (fn)); + maybe_remove_unused_call_args (cfun, call_stmt); + modified = true; + } + } + } + + if (modified) + { + /* When changing a call into a noreturn call, cfg cleanup + is needed to fix up the noreturn call. */ + if (!was_noreturn + && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) + to_fixup.safe_push (stmt); + /* When changing a condition or switch into one we know what + edge will be executed, schedule a cfg cleanup. */ + if ((gimple_code (stmt) == GIMPLE_COND + && (gimple_cond_true_p (as_a <gcond *> (stmt)) + || gimple_cond_false_p (as_a <gcond *> (stmt)))) + || (gimple_code (stmt) == GIMPLE_SWITCH + && TREE_CODE (gimple_switch_index + (as_a <gswitch *> (stmt))) == INTEGER_CST)) + el_todo |= TODO_cleanup_cfg; + /* If we removed EH side-effects from the statement, clean + its EH information. */ + if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) + { + bitmap_set_bit (need_eh_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed EH side-effects.\n"); + } + /* Likewise for AB side-effects. */ + if (can_make_abnormal_goto + && !stmt_can_make_abnormal_goto (stmt)) + { + bitmap_set_bit (need_ab_cleanup, + gimple_bb (stmt)->index); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Removed AB side-effects.\n"); + } + update_stmt (stmt); + if (vdef != gimple_vdef (stmt)) + VN_INFO (vdef)->valnum = vuse; + } + + /* Make new values available - for fully redundant LHS we + continue with the next stmt above and skip this. */ + def_operand_p defp; + FOR_EACH_SSA_DEF_OPERAND (defp, stmt, iter, SSA_OP_DEF) + eliminate_push_avail (DEF_FROM_PTR (defp)); + } + + /* Replace destination PHI arguments. */ + FOR_EACH_EDGE (e, ei, b->succs) + if (e->flags & EDGE_EXECUTABLE) + for (gphi_iterator gsi = gsi_start_phis (e->dest); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gphi *phi = gsi.phi (); + use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e); + tree arg = USE_FROM_PTR (use_p); + if (TREE_CODE (arg) != SSA_NAME + || virtual_operand_p (arg)) + continue; + tree sprime = eliminate_avail (arg); + if (sprime && may_propagate_copy (arg, sprime)) + propagate_value (use_p, sprime); + } + return NULL; +} + +/* Make no longer available leaders no longer available. */ + +void +eliminate_dom_walker::after_dom_children (basic_block) +{ + tree entry; + while ((entry = avail_stack.pop ()) != NULL_TREE) + { + tree valnum = VN_INFO (entry)->valnum; + tree old = avail[SSA_NAME_VERSION (valnum)]; + if (old == entry) + avail[SSA_NAME_VERSION (valnum)] = NULL_TREE; + else + avail[SSA_NAME_VERSION (valnum)] = entry; + } +} + +/* Eliminate fully redundant computations. */ + +unsigned int +vn_eliminate (bitmap inserted_exprs) +{ + eliminate_dom_walker el (CDI_DOMINATORS, inserted_exprs); + el.avail.reserve (num_ssa_names); + + el.walk (cfun->cfg->x_entry_block_ptr); + + /* We cannot remove stmts during BB walk, especially not release SSA + names there as this confuses the VN machinery. The stmts ending + up in to_remove are either stores or simple copies. + Remove stmts in reverse order to make debug stmt creation possible. */ + while (!el.to_remove.is_empty ()) + { + gimple *stmt = el.to_remove.pop (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Removing dead stmt "); + print_gimple_stmt (dump_file, stmt, 0, 0); + } + + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + if (gimple_code (stmt) == GIMPLE_PHI) + remove_phi_node (&gsi, true); + else + { + basic_block bb = gimple_bb (stmt); + unlink_stmt_vdef (stmt); + if (gsi_remove (&gsi, true)) + bitmap_set_bit (el.need_eh_cleanup, bb->index); + if (is_gimple_call (stmt) && stmt_can_make_abnormal_goto (stmt)) + bitmap_set_bit (el.need_ab_cleanup, bb->index); + release_defs (stmt); + } + + /* Removing a stmt may expose a forwarder block. */ + el.el_todo |= TODO_cleanup_cfg; + } + + /* Fixup stmts that became noreturn calls. This may require splitting + blocks and thus isn't possible during the dominator walk. Do this + in reverse order so we don't inadvertedly remove a stmt we want to + fixup by visiting a dominating now noreturn call first. */ + while (!el.to_fixup.is_empty ()) + { + gimple *stmt = el.to_fixup.pop (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Fixing up noreturn call "); + print_gimple_stmt (dump_file, stmt, 0); + } + + if (fixup_noreturn_call (stmt)) + el.el_todo |= TODO_cleanup_cfg; + } + + bool do_eh_cleanup = !bitmap_empty_p (el.need_eh_cleanup); + bool do_ab_cleanup = !bitmap_empty_p (el.need_ab_cleanup); + + if (do_eh_cleanup) + gimple_purge_all_dead_eh_edges (el.need_eh_cleanup); + + if (do_ab_cleanup) + gimple_purge_all_dead_abnormal_call_edges (el.need_ab_cleanup); + + if (do_eh_cleanup || do_ab_cleanup) + el.el_todo |= TODO_cleanup_cfg; + + statistics_counter_event (cfun, "Eliminated", el.eliminations); + statistics_counter_event (cfun, "Insertions", el.insertions); + + return el.el_todo; +} + + +namespace { + +const pass_data pass_data_fre = +{ + GIMPLE_PASS, /* type */ + "fre", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_FRE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_fre : public gimple_opt_pass +{ +public: + pass_fre (gcc::context *ctxt) + : gimple_opt_pass (pass_data_fre, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_fre (m_ctxt); } + virtual bool gate (function *) { return flag_tree_fre != 0; } + virtual unsigned int execute (function *); + +}; // class pass_fre + +unsigned int +pass_fre::execute (function *) +{ + unsigned int todo = 0; + + run_scc_vn (VN_WALKREWRITE); + + /* Remove all the redundant expressions. */ + todo |= vn_eliminate (NULL); + + scc_vn_restore_ssa_info (); + free_scc_vn (); + + return todo; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_fre (gcc::context *ctxt) +{ + return new pass_fre (ctxt); +} diff --git a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h index 77d0183..38877bc 100644 --- a/gcc/tree-ssa-sccvn.h +++ b/gcc/tree-ssa-sccvn.h @@ -214,6 +214,7 @@ extern vn_ssa_aux_t VN_INFO (tree); extern vn_ssa_aux_t VN_INFO_GET (tree); tree vn_get_expr_for (tree); void run_scc_vn (vn_lookup_kind); +unsigned int vn_eliminate (bitmap); void free_scc_vn (void); void scc_vn_restore_ssa_info (void); tree vn_nary_op_lookup (tree, vn_nary_op_t *); diff --git a/gcc/varasm.c b/gcc/varasm.c index d324ca03..a139151 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -3783,11 +3783,8 @@ force_const_mem (machine_mode mode, rtx x) *slot = desc; /* Align the location counter as required by EXP's data type. */ - align = GET_MODE_ALIGNMENT (mode == VOIDmode ? word_mode : mode); - - tree type = lang_hooks.types.type_for_mode (mode, 0); - if (type != NULL_TREE) - align = targetm.constant_alignment (make_tree (type, x), align); + machine_mode align_mode = (mode == VOIDmode ? word_mode : mode); + align = targetm.static_rtx_alignment (align_mode); pool->offset += (align / BITS_PER_UNIT) - 1; pool->offset &= ~ ((align / BITS_PER_UNIT) - 1); @@ -3829,7 +3826,6 @@ force_const_mem (machine_mode mode, rtx x) /* Construct the MEM. */ desc->mem = def = gen_const_mem (mode, symbol); - set_mem_attributes (def, lang_hooks.types.type_for_mode (mode, 0), 1); set_mem_align (def, align); /* If we're dropping a label to the constant pool, make sure we diff --git a/gcc/wide-int-print.cc b/gcc/wide-int-print.cc index 36d8ad8..8874e81 100644 --- a/gcc/wide-int-print.cc +++ b/gcc/wide-int-print.cc @@ -103,30 +103,28 @@ print_decu (const wide_int_ref &wi, FILE *file) } void -print_hex (const wide_int_ref &wi, char *buf) +print_hex (const wide_int_ref &val, char *buf) { - int i = wi.get_len (); - - if (wi == 0) + if (val == 0) buf += sprintf (buf, "0x0"); else { - if (wi::neg_p (wi)) + buf += sprintf (buf, "0x"); + int start = ROUND_DOWN (val.get_precision (), HOST_BITS_PER_WIDE_INT); + int width = val.get_precision () - start; + bool first_p = true; + for (int i = start; i >= 0; i -= HOST_BITS_PER_WIDE_INT) { - int j; - /* If the number is negative, we may need to pad value with - 0xFFF... because the leading elements may be missing and - we do not print a '-' with hex. */ - buf += sprintf (buf, "0x"); - for (j = BLOCKS_NEEDED (wi.get_precision ()); j > i; j--) - buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, HOST_WIDE_INT_M1); - + unsigned HOST_WIDE_INT uhwi = wi::extract_uhwi (val, i, width); + if (!first_p) + buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, uhwi); + else if (uhwi != 0) + { + buf += sprintf (buf, HOST_WIDE_INT_PRINT_HEX_PURE, uhwi); + first_p = false; + } + width = HOST_BITS_PER_WIDE_INT; } - else - buf += sprintf (buf, "0x" HOST_WIDE_INT_PRINT_HEX_PURE, wi.elt (--i)); - - while (--i >= 0) - buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, wi.elt (i)); } } diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc index 1a1a68c..ba0fd25 100644 --- a/gcc/wide-int.cc +++ b/gcc/wide-int.cc @@ -2253,6 +2253,17 @@ test_printing () VALUE_TYPE a = from_int<VALUE_TYPE> (42); assert_deceq ("42", a, SIGNED); assert_hexeq ("0x2a", a); + assert_hexeq ("0x1fffffffffffffffff", wi::shwi (-1, 69)); + assert_hexeq ("0xffffffffffffffff", wi::mask (64, false, 69)); + assert_hexeq ("0xffffffffffffffff", wi::mask <widest_int> (64, false)); + if (WIDE_INT_MAX_PRECISION > 128) + { + assert_hexeq ("0x20000000000000000fffffffffffffffe", + wi::lshift (1, 129) + wi::lshift (1, 64) - 2); + assert_hexeq ("0x200000000000004000123456789abcdef", + wi::lshift (1, 129) + wi::lshift (1, 74) + + wi::lshift (0x1234567, 32) + 0x89abcdef); + } } /* Verify that various operations work correctly for VALUE_TYPE, |