From 7ccf35ed170c496d99d670986e96e7c1d779cee3 Mon Sep 17 00:00:00 2001 From: Dorit Naishlos Date: Thu, 23 Sep 2004 14:34:35 +0000 Subject: tree.def (ALIGN_INDIRECT_REF, [...]): New tree-codes. 2004-09-23 Dorit Naishlos * tree.def (ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF): New tree-codes. * tree.h (REF_ORIGINAL): Consider ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF. * alias.c (get_alias_set, nonoverlapping_memrefs_p): Likewise. * emit-rtl.c (mem_expr_equal_p, set_mem_attributes_minus_bitpos): Likewise. * expr.c (safe_from_p, expand_expr_real_1, rewrite_address_base) (find_interesting_uses_address): Likewise. * fold-const.c (non_lvalue, operand_equal_p): Likewise. (build_fold_addr_expr_with_type): Likewise. * gimplify.c (gimplify_addr_expr, gimplify_expr): Likewise. * print-rtl.c (print_mem_expr): Likewise. * tree-dump.c (dequeue_and_dump): Likewise. * tree-eh.c (tree_could_trap_p): Likewise. * tree-gimple.c (is_gimple_addressable, get_base_address): Likewise. * tree-pretty-print.c (op_prio, op_symbol, dump_generic_node): Likewise. * tree-ssa-alias.c (find_ptr_dereference, ptr_is_dereferenced_by): Likewise. * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Likewise. * tree-ssa-dom.c (record_equivalences_from_stmt): Likewise. * tree-ssa-loop-im.c (for_each_index, is_call_clobbered_ref): Likewise. * tree-ssa-loop-ivopts.c (find_interesting_uses_address): Likewise. (add_address_candidates, rewrite_address_base): Likewise. * tree-ssa-operands.c (get_expr_operands, get_indirect_ref_operands): Likewise. * tree.c (staticp, build1_stat): Likewise. * tree.def (REALIGN_LOAD_EXPR, REALIGN_STORE_EXPR): New tree-codes. * tree-pretty-print.c (dump_generic_node): Consider REALIGN_LOAD_EXPR. * tree-ssa-operands.c (get_expr_operands): Likewise. * expr.c (expand_expr_real_1): Likewise. * optabs.h (vec_realign_store_optab, vec_realign_load_optab): New optabs. (OTI_vec_realign_store, OTI_vec_realign_load): New optab_index values for the new optabs. (expand_ternary_op): New function. * genopinit.c (optabs): Handle the new optabs. * optabs.c (optab_for_tree_code): Add cases for the new tree-codes. (init_optabs): Initialize vec_realign_load_optab. (expand_ternary_op): New functions. * target-def.h (TARGET_VECTORIZE): New member for struct gcc_target. (TARGET_VECTORIZE_MISALIGNED_MEM_OK): New member for targetm.vectorize. (TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD): Likewise. (TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE): Likewise. * target.h (struct vectorize): New member for struct gcc_target. (misaligned_mem_ok): New member for targetm.vectorize. (builtin_mask_for_load): Likewise. (builtin_mask_for_store): Likewise. * targethooks.c (default_vect_misaligned_mem_ok): New function. * targethooks.h (default_vect_misaligned_mem_ok): New function. * config/rs6000/altivec.md (build_vector_mask_for_load): New define_expand. (vec_realign_load_v4si, vec_realign_load_v4sf, vec_realign_load_v8hi) (vec_realign_load_v16qi): New define_insn. * config/rs6000/rs6000.h (ALTIVEC_BUILTIN_MASK_FOR_LOAD): (ALTIVEC_BUILTIN_MASK_FOR_STORE): New target builtins. * config/rs6000/rs6000.c (altivec_builtin_mask_for_load): (altivec_builtin_mask_for_store): New variables. (rs6000_builtin_mask_for_load): New function. Implements TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD. (rs6000_builtin_mask_for_store): New function. Implements TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE. (rs6000_expand_builtin): Expand the target builtins builtin_mask_for_load and builtin_mask_for_store. (altivec_init_builtins): Initialize the new target builtins. * config/i386/i386.c (ix86_misaligned_mem_ok): New function. Implements the target hook TARGET_VECTORIZE_MISALIGNED_MEM_OK. * tree-vectorizer.c (vect_create_data_ref): Renamed to vect_create_data_ref_ptr. Returns a pointer instead of an array-ref. (vect_create_addr_base_for_vector_ref): Additional argument (offset). (vectorizable_store): Call vect_create_data_ref_ptr with additional arguments, and create an indirect_ref with its return value data_ref. Check aligned_access_p. (vectorizable_load): Handle misaligned loads, using software-pipelined scheme with REALIGN_LOAD_EXPR and ALIGN_INDIRECT_REF if vec_realign_load_optab is supported, or using a scheme without software-pipelining with MISALIGNED_INDIRECT_REF if the target hook misaligned_mem_ok is supported. (vect_finish_stmt_generation): Typo. (vect_enhance_data_refs_alignment): Rename loop_vinfo to loop_info. (vect_analyze_data_refs_alignment): Don't fail vectorization in the presence of misaligned loads. (vect_analyze_data_ref_access): Add check for constant init. (vect_get_symbl_and_dr): Remove duplicate line. * tree-vectorizer.h (DR_MISALIGNMENT): Add comment. From-SVN: r87948 --- gcc/ChangeLog | 94 +++++++++ gcc/alias.c | 12 +- gcc/config/i386/i386.c | 15 ++ gcc/config/rs6000/altivec.md | 58 ++++++ gcc/config/rs6000/rs6000.c | 96 +++++++++ gcc/config/rs6000/rs6000.h | 2 + gcc/emit-rtl.c | 33 ++- gcc/expr.c | 34 +++ gcc/fold-const.c | 8 +- gcc/genopinit.c | 4 +- gcc/gimplify.c | 3 + gcc/optabs.c | 90 ++++++++ gcc/optabs.h | 10 + gcc/print-rtl.c | 12 ++ gcc/target-def.h | 12 ++ gcc/target.h | 17 ++ gcc/targhooks.c | 6 + gcc/targhooks.h | 2 + gcc/testsuite/ChangeLog | 35 ++++ gcc/testsuite/gcc.dg/vect/vect-13.c | 22 +- gcc/testsuite/gcc.dg/vect/vect-26.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-27.c | 4 +- gcc/testsuite/gcc.dg/vect/vect-27a.c | 47 +++++ gcc/testsuite/gcc.dg/vect/vect-28.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-29.c | 4 +- gcc/testsuite/gcc.dg/vect/vect-29a.c | 50 +++++ gcc/testsuite/gcc.dg/vect/vect-40.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-41.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-42.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-43.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-44.c | 13 +- gcc/testsuite/gcc.dg/vect/vect-45.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-46.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-47.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-48.c | 12 +- gcc/testsuite/gcc.dg/vect/vect-48a.c | 58 ++++++ gcc/testsuite/gcc.dg/vect/vect-49.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-50.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-51.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-52.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-53.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-54.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-55.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-56.c | 5 +- gcc/testsuite/gcc.dg/vect/vect-56a.c | 56 +++++ gcc/testsuite/gcc.dg/vect/vect-57.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-58.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-59.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-60.c | 4 +- gcc/testsuite/gcc.dg/vect/vect-61.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-72.c | 47 +++++ gcc/testsuite/gcc.dg/vect/vect-72a.c | 47 +++++ gcc/testsuite/gcc.dg/vect/vect-75.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-76.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-77.c | 4 +- gcc/testsuite/gcc.dg/vect/vect-77a.c | 47 +++++ gcc/testsuite/gcc.dg/vect/vect-78.c | 2 +- gcc/tree-dump.c | 2 + gcc/tree-eh.c | 2 + gcc/tree-gimple.c | 6 +- gcc/tree-pretty-print.c | 27 +++ gcc/tree-ssa-alias.c | 11 +- gcc/tree-ssa-dce.c | 4 +- gcc/tree-ssa-dom.c | 4 +- gcc/tree-ssa-loop-im.c | 6 +- gcc/tree-ssa-loop-ivopts.c | 29 ++- gcc/tree-ssa-operands.c | 16 +- gcc/tree-vectorizer.c | 391 ++++++++++++++++++++++++++--------- gcc/tree-vectorizer.h | 1 + gcc/tree.c | 4 + gcc/tree.def | 33 +++ gcc/tree.h | 10 +- 72 files changed, 1398 insertions(+), 167 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-27a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-29a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-48a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-56a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-72.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-72a.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-77a.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aa7bb17..65f8c94 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,97 @@ +2004-09-23 Dorit Naishlos + + * tree.def (ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF): + New tree-codes. + * tree.h (REF_ORIGINAL): Consider ALIGN_INDIRECT_REF and + MISALIGNED_INDIRECT_REF. + * alias.c (get_alias_set, nonoverlapping_memrefs_p): Likewise. + * emit-rtl.c (mem_expr_equal_p, set_mem_attributes_minus_bitpos): + Likewise. + * expr.c (safe_from_p, expand_expr_real_1, rewrite_address_base) + (find_interesting_uses_address): Likewise. + * fold-const.c (non_lvalue, operand_equal_p): Likewise. + (build_fold_addr_expr_with_type): Likewise. + * gimplify.c (gimplify_addr_expr, gimplify_expr): Likewise. + * print-rtl.c (print_mem_expr): Likewise. + * tree-dump.c (dequeue_and_dump): Likewise. + * tree-eh.c (tree_could_trap_p): Likewise. + * tree-gimple.c (is_gimple_addressable, get_base_address): Likewise. + * tree-pretty-print.c (op_prio, op_symbol, dump_generic_node): Likewise. + * tree-ssa-alias.c (find_ptr_dereference, ptr_is_dereferenced_by): + Likewise. + * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Likewise. + * tree-ssa-dom.c (record_equivalences_from_stmt): Likewise. + * tree-ssa-loop-im.c (for_each_index, is_call_clobbered_ref): Likewise. + * tree-ssa-loop-ivopts.c (find_interesting_uses_address): Likewise. + (add_address_candidates, rewrite_address_base): Likewise. + * tree-ssa-operands.c (get_expr_operands, get_indirect_ref_operands): + Likewise. + * tree.c (staticp, build1_stat): Likewise. + + * tree.def (REALIGN_LOAD_EXPR, REALIGN_STORE_EXPR): New tree-codes. + * tree-pretty-print.c (dump_generic_node): Consider REALIGN_LOAD_EXPR. + * tree-ssa-operands.c (get_expr_operands): Likewise. + * expr.c (expand_expr_real_1): Likewise. + + * optabs.h (vec_realign_store_optab, vec_realign_load_optab): New + optabs. + (OTI_vec_realign_store, OTI_vec_realign_load): New optab_index values + for the new optabs. + (expand_ternary_op): New function. + * genopinit.c (optabs): Handle the new optabs. + * optabs.c (optab_for_tree_code): Add cases for the new tree-codes. + (init_optabs): Initialize vec_realign_load_optab. + (expand_ternary_op): New functions. + + * target-def.h (TARGET_VECTORIZE): New member for struct gcc_target. + (TARGET_VECTORIZE_MISALIGNED_MEM_OK): New member for targetm.vectorize. + (TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD): Likewise. + (TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE): Likewise. + * target.h (struct vectorize): New member for struct gcc_target. + (misaligned_mem_ok): New member for targetm.vectorize. + (builtin_mask_for_load): Likewise. + (builtin_mask_for_store): Likewise. + * targethooks.c (default_vect_misaligned_mem_ok): New function. + * targethooks.h (default_vect_misaligned_mem_ok): New function. + + * config/rs6000/altivec.md (build_vector_mask_for_load): New + define_expand. + (vec_realign_load_v4si, vec_realign_load_v4sf, vec_realign_load_v8hi) + (vec_realign_load_v16qi): New define_insn. + * config/rs6000/rs6000.h (ALTIVEC_BUILTIN_MASK_FOR_LOAD): + (ALTIVEC_BUILTIN_MASK_FOR_STORE): New target builtins. + * config/rs6000/rs6000.c (altivec_builtin_mask_for_load): + (altivec_builtin_mask_for_store): New variables. + (rs6000_builtin_mask_for_load): New function. Implements + TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD. + (rs6000_builtin_mask_for_store): New function. Implements + TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE. + (rs6000_expand_builtin): Expand the target builtins + builtin_mask_for_load and builtin_mask_for_store. + (altivec_init_builtins): Initialize the new target builtins. + * config/i386/i386.c (ix86_misaligned_mem_ok): New function. + Implements the target hook TARGET_VECTORIZE_MISALIGNED_MEM_OK. + + * tree-vectorizer.c (vect_create_data_ref): Renamed to + vect_create_data_ref_ptr. Returns a pointer instead of an array-ref. + (vect_create_addr_base_for_vector_ref): Additional argument (offset). + (vectorizable_store): Call vect_create_data_ref_ptr with additional + arguments, and create an indirect_ref with its return value data_ref. + Check aligned_access_p. + (vectorizable_load): Handle misaligned loads, using software-pipelined + scheme with REALIGN_LOAD_EXPR and ALIGN_INDIRECT_REF if + vec_realign_load_optab is supported, or using a scheme without + software-pipelining with MISALIGNED_INDIRECT_REF if the target hook + misaligned_mem_ok is supported. + + (vect_finish_stmt_generation): Typo. + (vect_enhance_data_refs_alignment): Rename loop_vinfo to loop_info. + (vect_analyze_data_refs_alignment): Don't fail vectorization in the + presence of misaligned loads. + (vect_analyze_data_ref_access): Add check for constant init. + (vect_get_symbl_and_dr): Remove duplicate line. + * tree-vectorizer.h (DR_MISALIGNMENT): Add comment. + 2004-09-23 Kazu Hirata * builtins.c: Fix a comment typo. diff --git a/gcc/alias.c b/gcc/alias.c index b937cb1..e096cbf6 100644 --- a/gcc/alias.c +++ b/gcc/alias.c @@ -450,7 +450,9 @@ get_alias_set (tree t) } /* Check for accesses through restrict-qualified pointers. */ - if (TREE_CODE (inner) == INDIRECT_REF) + if (TREE_CODE (inner) == INDIRECT_REF + || TREE_CODE (inner) == ALIGN_INDIRECT_REF + || TREE_CODE (inner) == MISALIGNED_INDIRECT_REF) { tree decl = find_base_decl (TREE_OPERAND (inner, 0)); @@ -2006,7 +2008,9 @@ nonoverlapping_memrefs_p (rtx x, rtx y) moffsetx = adjust_offset_for_component_ref (exprx, moffsetx); exprx = t; } - else if (TREE_CODE (exprx) == INDIRECT_REF) + else if (TREE_CODE (exprx) == INDIRECT_REF + || TREE_CODE (exprx) == ALIGN_INDIRECT_REF + || TREE_CODE (exprx) == MISALIGNED_INDIRECT_REF) { exprx = TREE_OPERAND (exprx, 0); if (flag_argument_noalias < 2 @@ -2023,7 +2027,9 @@ nonoverlapping_memrefs_p (rtx x, rtx y) moffsety = adjust_offset_for_component_ref (expry, moffsety); expry = t; } - else if (TREE_CODE (expry) == INDIRECT_REF) + else if (TREE_CODE (expry) == INDIRECT_REF + || TREE_CODE (expry) == ALIGN_INDIRECT_REF + || TREE_CODE (expry) == MISALIGNED_INDIRECT_REF) { expry = TREE_OPERAND (expry, 0); if (flag_argument_noalias < 2 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 09db92e..de8411e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -868,6 +868,7 @@ static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); static int ix86_issue_rate (void); static int ix86_adjust_cost (rtx, rtx, rtx, int); static int ia32_multipass_dfa_lookahead (void); +static bool ix86_misaligned_mem_ok (enum machine_mode); static void ix86_init_mmx_sse_builtins (void); static rtx x86_this_parameter (tree); static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, @@ -1014,6 +1015,9 @@ static void init_ext_80387_constants (void); #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ ia32_multipass_dfa_lookahead +#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK +#define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall @@ -11616,6 +11620,17 @@ ia32_multipass_dfa_lookahead (void) } +/* Implement the target hook targetm.vectorize.misaligned_mem_ok. */ + +static bool +ix86_misaligned_mem_ok (enum machine_mode mode) +{ + if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) + return true; + else + return false; +} + /* Compute the alignment given to a constant that is being placed in memory. EXP is the constant and ALIGN is the alignment that the object would ordinarily have. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index ff58d8e..0eb29b1 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1902,6 +1902,27 @@ "lvsr %0,%y1" [(set_attr "type" "vecload")]) +(define_expand "build_vector_mask_for_load" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand 1 "memory_operand" "m")] 195))] + "TARGET_ALTIVEC" + " +{ + rtx addr; + rtx temp; + + if (GET_CODE (operands[1]) != MEM) + abort (); + + addr = XEXP (operands[1], 0); + temp = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (VOIDmode, temp, + gen_rtx_NEG (GET_MODE (addr), addr))); + emit_insn (gen_altivec_lvsr (operands[0], + gen_rtx_MEM (GET_MODE (operands[1]), temp))); + DONE; +}") + ;; Parallel some of the LVE* and STV*'s with unspecs because some have ;; identical rtl but different instructions-- and gcc gets confused. @@ -2062,3 +2083,40 @@ "vspltisb %2,0\;vsubsws %3,%2,%1\;vmaxsw %0,%1,%3" [(set_attr "type" "vecsimple") (set_attr "length" "12")]) + +(define_insn "vec_realign_load_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 215))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "vec_realign_load_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 216))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "vec_realign_load_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 217))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "vec_realign_load_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] 218))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index eba4a06..7f448ee 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -143,6 +143,11 @@ enum rs6000_dependence_cost rs6000_sched_costly_dep; const char *rs6000_sched_insert_nops_str; enum rs6000_nop_insertion rs6000_sched_insert_nops; +/* Support targetm.vectorize.builtin_mask_for_load. */ +tree altivec_builtin_mask_for_load; +/* Support targetm.vectorize.builtin_mask_for_store. */ +tree altivec_builtin_mask_for_store; + /* Size of long double */ const char *rs6000_long_double_size_string; int rs6000_long_double_type_size; @@ -681,6 +686,8 @@ static int redefine_groups (FILE *, int, rtx, rtx); static int pad_groups (FILE *, int, rtx, rtx); static void rs6000_sched_finish (FILE *, int); static int rs6000_use_sched_lookahead (void); +static tree rs6000_builtin_mask_for_load (void); +static tree rs6000_builtin_mask_for_store (void); static void rs6000_init_builtins (void); static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx); @@ -905,6 +912,12 @@ static const char alt_reg_names[][8] = #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead +#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load + +#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE rs6000_builtin_mask_for_store + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins @@ -1536,6 +1549,26 @@ rs6000_override_options (const char *default_cpu) } } +/* Implement targetm.vectorize.builtin_mask_for_load. */ +static tree +rs6000_builtin_mask_for_load (void) +{ + if (TARGET_ALTIVEC) + return altivec_builtin_mask_for_load; + else + return 0; +} + +/* Implement targetm.vectorize.builtin_mask_for_store. */ +static tree +rs6000_builtin_mask_for_store (void) +{ + if (TARGET_ALTIVEC) + return altivec_builtin_mask_for_store; + else + return 0; +} + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. @@ -7202,6 +7235,48 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, rtx ret; bool success; + if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD + || fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) + { + int icode = (int) CODE_FOR_altivec_lvsr; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode = insn_data[icode].operand[1].mode; + tree arg; + rtx op, addr, pat; + + if (!TARGET_ALTIVEC) + abort (); + + arg = TREE_VALUE (arglist); + if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE) + abort (); + op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); + addr = memory_address (mode, op); + if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) + op = addr; + else + { + /* For the load case need to negate the address. */ + op = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (VOIDmode, op, + gen_rtx_NEG (GET_MODE (addr), addr))); + } + op = gen_rtx_MEM (mode, op); + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + /*pat = gen_altivec_lvsr (target, op);*/ + pat = GEN_FCN (icode) (target, op); + if (!pat) + return 0; + emit_insn (pat); + + return target; + } + if (TARGET_ALTIVEC) { ret = altivec_expand_builtin (exp, target, &success); @@ -7691,6 +7766,9 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, pcchar_type_node, NULL_TREE); + tree id; + tree decl; + def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4sf", v4sf_ftype_pcfloat, ALTIVEC_BUILTIN_LD_INTERNAL_4sf); def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4sf", void_ftype_pfloat_v4sf, @@ -7792,6 +7870,24 @@ altivec_init_builtins (void) def_builtin (d->mask, d->name, type, d->code); } + + /* Initialize target builtin that implements + targetm.vectorize.builtin_mask_for_load. */ + id = get_identifier ("__builtin_altivec_mask_for_load"); + decl = build_decl (FUNCTION_DECL, id, v16qi_ftype_long_pcvoid); + DECL_BUILT_IN_CLASS (decl) = BUILT_IN_MD; + DECL_FUNCTION_CODE (decl) = ALTIVEC_BUILTIN_MASK_FOR_LOAD; + /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ + altivec_builtin_mask_for_load = decl; + + /* Initialize target builtin that implements + targetm.vectorize.builtin_mask_for_store. */ + id = get_identifier ("__builtin_altivec_mask_for_store"); + decl = build_decl (FUNCTION_DECL, id, v16qi_ftype_long_pcvoid); + DECL_BUILT_IN_CLASS (decl) = BUILT_IN_MD; + DECL_FUNCTION_CODE (decl) = ALTIVEC_BUILTIN_MASK_FOR_STORE; + /* Record the decl. Will be used by rs6000_builtin_mask_for_store. */ + altivec_builtin_mask_for_store = decl; } static void diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 801d54f..399f2e2 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2822,6 +2822,8 @@ enum rs6000_builtins ALTIVEC_BUILTIN_ABS_V8HI, ALTIVEC_BUILTIN_ABS_V16QI, ALTIVEC_BUILTIN_COMPILETIME_ERROR, + ALTIVEC_BUILTIN_MASK_FOR_LOAD, + ALTIVEC_BUILTIN_MASK_FOR_STORE, /* SPE builtins. */ SPE_BUILTIN_EVADDW, diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index ce597b0..2572b85 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -1485,7 +1485,9 @@ mem_expr_equal_p (tree expr1, tree expr2) && mem_expr_equal_p (TREE_OPERAND (expr1, 1), /* field decl */ TREE_OPERAND (expr2, 1)); - if (TREE_CODE (expr1) == INDIRECT_REF) + if (TREE_CODE (expr1) == INDIRECT_REF + || TREE_CODE (expr1) == ALIGN_INDIRECT_REF + || TREE_CODE (expr1) == MISALIGNED_INDIRECT_REF) return mem_expr_equal_p (TREE_OPERAND (expr1, 0), TREE_OPERAND (expr2, 0)); @@ -1546,8 +1548,19 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp, /* We can set the alignment from the type if we are making an object, this is an INDIRECT_REF, or if TYPE_ALIGN_OK. */ - if (objectp || TREE_CODE (t) == INDIRECT_REF || TYPE_ALIGN_OK (type)) + if (objectp || TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == ALIGN_INDIRECT_REF + || TYPE_ALIGN_OK (type)) align = MAX (align, TYPE_ALIGN (type)); + else + if (TREE_CODE (t) == MISALIGNED_INDIRECT_REF) + { + if (integer_zerop (TREE_OPERAND (t, 1))) + /* We don't know anything about the alignment. */ + align = BITS_PER_UNIT; + else + align = tree_low_cst (TREE_OPERAND (t, 1), 1); + } /* If the size is known, we can set that. */ if (TYPE_SIZE_UNIT (type) && host_integerp (TYPE_SIZE_UNIT (type), 1)) @@ -1672,7 +1685,9 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp, the size we got from the type? */ } else if (flag_argument_noalias > 1 - && TREE_CODE (t2) == INDIRECT_REF + && (TREE_CODE (t2) == INDIRECT_REF + || TREE_CODE (t2) == ALIGN_INDIRECT_REF + || TREE_CODE (t2) == MISALIGNED_INDIRECT_REF) && TREE_CODE (TREE_OPERAND (t2, 0)) == PARM_DECL) { expr = t2; @@ -1683,7 +1698,9 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp, /* If this is a Fortran indirect argument reference, record the parameter decl. */ else if (flag_argument_noalias > 1 - && TREE_CODE (t) == INDIRECT_REF + && (TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == ALIGN_INDIRECT_REF + || TREE_CODE (t) == MISALIGNED_INDIRECT_REF) && TREE_CODE (TREE_OPERAND (t, 0)) == PARM_DECL) { expr = t; @@ -1701,6 +1718,14 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp, size = plus_constant (size, apply_bitpos / BITS_PER_UNIT); } + if (TREE_CODE (t) == ALIGN_INDIRECT_REF) + { + /* Force EXPR and OFFSE to NULL, since we don't know exactly what + we're overlapping. */ + offset = NULL; + expr = NULL; + } + /* Now set the attributes we computed above. */ MEM_ATTRS (ref) = get_mem_attrs (alias, expr, offset, size, align, GET_MODE (ref)); diff --git a/gcc/expr.c b/gcc/expr.c index da8304a..96cab62 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5829,6 +5829,8 @@ safe_from_p (rtx x, tree exp, int top_p) } break; + case MISALIGNED_INDIRECT_REF: + case ALIGN_INDIRECT_REF: case INDIRECT_REF: if (MEM_P (x) && alias_sets_conflict_p (MEM_ALIAS_SET (x), @@ -6745,11 +6747,17 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return target; } + case MISALIGNED_INDIRECT_REF: + case ALIGN_INDIRECT_REF: case INDIRECT_REF: { tree exp1 = TREE_OPERAND (exp, 0); tree orig; + if (code == MISALIGNED_INDIRECT_REF + && !targetm.vectorize.misaligned_mem_ok (mode)) + abort (); + if (modifier != EXPAND_WRITE) { tree t; @@ -6761,6 +6769,14 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, op0 = expand_expr (exp1, NULL_RTX, VOIDmode, EXPAND_SUM); op0 = memory_address (mode, op0); + + if (code == ALIGN_INDIRECT_REF) + { + int align = TYPE_ALIGN_UNIT (type); + op0 = gen_rtx_AND (Pmode, op0, GEN_INT (-align)); + op0 = memory_address (mode, op0); + } + temp = gen_rtx_MEM (mode, op0); orig = REF_ORIGINAL (exp); @@ -8203,6 +8219,24 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return expand_expr_real (TREE_OPERAND (exp, 0), original_target, tmode, modifier, alt_rtl); + case REALIGN_LOAD_EXPR: + { + tree oprnd0 = TREE_OPERAND (exp, 0); + tree oprnd1 = TREE_OPERAND (exp, 1); + tree oprnd2 = TREE_OPERAND (exp, 2); + rtx op2; + + this_optab = optab_for_tree_code (code, type); + expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, 0); + op2 = expand_expr (oprnd2, NULL_RTX, VOIDmode, 0); + temp = expand_ternary_op (mode, this_optab, op0, op1, op2, + target, unsignedp); + if (temp == 0) + abort (); + return temp; + } + + default: return lang_hooks.expand_expr (exp, original_target, tmode, modifier, alt_rtl); diff --git a/gcc/fold-const.c b/gcc/fold-const.c index c762fad..7049f4f 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -1990,6 +1990,8 @@ non_lvalue (tree x) case COMPONENT_REF: case INDIRECT_REF: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case ARRAY_REF: case ARRAY_RANGE_REF: case BIT_FIELD_REF: @@ -2466,6 +2468,8 @@ operand_equal_p (tree arg0, tree arg1, unsigned int flags) switch (TREE_CODE (arg0)) { case INDIRECT_REF: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case REALPART_EXPR: case IMAGPART_EXPR: return operand_equal_p (TREE_OPERAND (arg0, 0), @@ -10478,7 +10482,9 @@ build_fold_addr_expr_with_type (tree t, tree ptrtype) if (TREE_CODE (t) == WITH_SIZE_EXPR) t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == INDIRECT_REF) + /* Note: doesn't apply to ALIGN_INDIRECT_REF */ + if (TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == MISALIGNED_INDIRECT_REF) { t = TREE_OPERAND (t, 0); if (TREE_TYPE (t) != ptrtype) diff --git a/gcc/genopinit.c b/gcc/genopinit.c index 273a5fb..0d39f67 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -169,7 +169,9 @@ static const char * const optabs[] = "cmpmem_optab[$A] = CODE_FOR_$(cmpmem$a$)", "vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)", "vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)", - "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)" }; + "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)", + "vec_realign_store_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_store_$a$)", + "vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)" }; static void gen_insn (rtx); diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 1bf14a7..27744bc 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -3052,6 +3052,7 @@ gimplify_addr_expr (tree *expr_p, tree *pre_p, tree *post_p) switch (TREE_CODE (op0)) { case INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: do_indirect_ref: /* Check if we are dealing with an expression of the form '&*ptr'. While the front end folds away '&*ptr' into 'ptr', these @@ -3642,6 +3643,8 @@ gimplify_expr (tree *expr_p, tree *pre_p, tree *post_p, recalculate_side_effects (*expr_p); break; + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case INDIRECT_REF: ret = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, post_p, is_gimple_reg, fb_rvalue); diff --git a/gcc/optabs.c b/gcc/optabs.c index 0b75a81..abb6970 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -286,6 +286,12 @@ optab_for_tree_code (enum tree_code code, tree type) case MIN_EXPR: return TYPE_UNSIGNED (type) ? umin_optab : smin_optab; + case REALIGN_STORE_EXPR: + return vec_realign_store_optab; + + case REALIGN_LOAD_EXPR: + return vec_realign_load_optab; + default: break; } @@ -313,6 +319,88 @@ optab_for_tree_code (enum tree_code code, tree type) } } + +/* Generate code to perform an operation specified by TERNARY_OPTAB + on operands OP0, OP1 and OP2, with result having machine-mode MODE. + + UNSIGNEDP is for the case where we have to widen the operands + to perform the operation. It says to use zero-extension. + + If TARGET is nonzero, the value + is generated there, if it is convenient to do so. + In all cases an rtx is returned for the locus of the value; + this may or may not be TARGET. */ + +rtx +expand_ternary_op (enum machine_mode mode, optab ternary_optab, rtx op0, + rtx op1, rtx op2, rtx target, int unsignedp) +{ + int icode = (int) ternary_optab->handlers[(int) mode].insn_code; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode mode2 = insn_data[icode].operand[3].mode; + rtx temp; + rtx pat; + rtx xop0 = op0, xop1 = op1, xop2 = op2; + + if (ternary_optab->handlers[(int) mode].insn_code == CODE_FOR_nothing) + abort (); + + if (!target + || ! (*insn_data[icode].operand[0].predicate) (target, mode)) + temp = gen_reg_rtx (mode); + else + temp = target; + + /* In case the insn wants input operands in modes different from + those of the actual operands, convert the operands. It would + seem that we don't need to convert CONST_INTs, but we do, so + that they're properly zero-extended, sign-extended or truncated + for their mode. */ + + if (GET_MODE (op0) != mode0 && mode0 != VOIDmode) + xop0 = convert_modes (mode0, + GET_MODE (op0) != VOIDmode + ? GET_MODE (op0) + : mode, + xop0, unsignedp); + + if (GET_MODE (op1) != mode1 && mode1 != VOIDmode) + xop1 = convert_modes (mode1, + GET_MODE (op1) != VOIDmode + ? GET_MODE (op1) + : mode, + xop1, unsignedp); + + if (GET_MODE (op2) != mode2 && mode2 != VOIDmode) + xop2 = convert_modes (mode2, + GET_MODE (op2) != VOIDmode + ? GET_MODE (op2) + : mode, + xop2, unsignedp); + + /* Now, if insn's predicates don't allow our operands, put them into + pseudo regs. */ + + if (! (*insn_data[icode].operand[1].predicate) (xop0, mode0) + && mode0 != VOIDmode) + xop0 = copy_to_mode_reg (mode0, xop0); + + if (! (*insn_data[icode].operand[2].predicate) (xop1, mode1) + && mode1 != VOIDmode) + xop1 = copy_to_mode_reg (mode1, xop1); + + if (! (*insn_data[icode].operand[3].predicate) (xop2, mode2) + && mode2 != VOIDmode) + xop2 = copy_to_mode_reg (mode2, xop2); + + pat = GEN_FCN (icode) (temp, xop0, xop1, xop2); + + emit_insn (pat); + return temp; +} + + /* Like expand_binop, but return a constant rtx if the result can be calculated at compile time. The arguments and return value are otherwise the same as for expand_binop. */ @@ -4657,6 +4745,8 @@ init_optabs (void) vec_extract_optab = init_optab (UNKNOWN); vec_set_optab = init_optab (UNKNOWN); vec_init_optab = init_optab (UNKNOWN); + vec_realign_load_optab = init_optab (UNKNOWN); + /* Conversions. */ sext_optab = init_convert_optab (SIGN_EXTEND); zext_optab = init_convert_optab (ZERO_EXTEND); diff --git a/gcc/optabs.h b/gcc/optabs.h index b5632c7..8e895f2 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -228,6 +228,10 @@ enum optab_index OTI_vec_extract, /* Initialize vector operand. */ OTI_vec_init, + /* Extract specified elements from vectors, for vector store. */ + OTI_vec_realign_store, + /* Extract specified elements from vectors, for vector load. */ + OTI_vec_realign_load, OTI_MAX }; @@ -330,6 +334,8 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define vec_set_optab (optab_table[OTI_vec_set]) #define vec_extract_optab (optab_table[OTI_vec_extract]) #define vec_init_optab (optab_table[OTI_vec_init]) +#define vec_realign_store_optab (optab_table[OTI_vec_realign_store]) +#define vec_realign_load_optab (optab_table[OTI_vec_realign_load]) /* Conversion optabs have their own table and indexes. */ enum convert_optab_index @@ -405,6 +411,10 @@ extern enum insn_code cmpmem_optab[NUM_MACHINE_MODES]; /* Define functions given in optabs.c. */ +extern rtx expand_ternary_op (enum machine_mode mode, optab ternary_optab, + rtx op0, rtx op1, rtx op2, rtx target, + int unsignedp); + /* Expand a binary operation given optab and rtx operands. */ extern rtx expand_binop (enum machine_mode, optab, rtx, rtx, rtx, int, enum optab_methods); diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index b4b2ca3..32ef01c 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -102,6 +102,18 @@ print_mem_expr (FILE *outfile, tree expr) print_mem_expr (outfile, TREE_OPERAND (expr, 0)); fputs (")", outfile); } + else if (TREE_CODE (expr) == ALIGN_INDIRECT_REF) + { + fputs (" (A*", outfile); + print_mem_expr (outfile, TREE_OPERAND (expr, 0)); + fputs (")", outfile); + } + else if (TREE_CODE (expr) == MISALIGNED_INDIRECT_REF) + { + fputs (" (M*", outfile); + print_mem_expr (outfile, TREE_OPERAND (expr, 0)); + fputs (")", outfile); + } else if (TREE_CODE (expr) == RESULT_DECL) fputs (" ", outfile); else diff --git a/gcc/target-def.h b/gcc/target-def.h index a77c3be..61adf14 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -273,6 +273,17 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. TARGET_SCHED_DFA_NEW_CYCLE, \ TARGET_SCHED_IS_COSTLY_DEPENDENCE} +#ifndef TARGET_VECTORIZE_MISALIGNED_MEM_OK +#define TARGET_VECTORIZE_MISALIGNED_MEM_OK default_vect_misaligned_mem_ok +#endif +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0 +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE 0 + +#define TARGET_VECTORIZE \ + {TARGET_VECTORIZE_MISALIGNED_MEM_OK, \ + TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \ + TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE} + /* In except.c */ #define TARGET_EH_RETURN_FILTER_MODE default_eh_return_filter_mode @@ -466,6 +477,7 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. { \ TARGET_ASM_OUT, \ TARGET_SCHED, \ + TARGET_VECTORIZE, \ TARGET_EH_RETURN_FILTER_MODE, \ TARGET_MERGE_DECL_ATTRIBUTES, \ TARGET_MERGE_TYPE_ATTRIBUTES, \ diff --git a/gcc/target.h b/gcc/target.h index 6824c13..9f766ae 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -282,6 +282,23 @@ struct gcc_target bool (* is_costly_dependence) (rtx, rtx, rtx, int, int); } sched; + /* Functions relating to vectorization. */ + struct vectorize + { + /* The following member value is a pointer to a function called + by the vectorizer, and when expanding a MISALIGNED_INDIREC_REF + expression. If the hook returns true (false) then a move* pattern + to/from memory can (cannot) be generated for this mode even if the + memory location is unaligned. */ + bool (* misaligned_mem_ok) (enum machine_mode); + + /* The following member values are pointers to functions called + by the vectorizer, and return the decl of the target builtin + function. */ + tree (* builtin_mask_for_load) (void); + tree (* builtin_mask_for_store) (void); + } vectorize; + /* Return machine mode for filter value. */ enum machine_mode (* eh_return_filter_mode) (void); diff --git a/gcc/targhooks.c b/gcc/targhooks.c index c717075..48690a1 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -269,6 +269,12 @@ default_scalar_mode_supported_p (enum machine_mode mode) } bool +default_vect_misaligned_mem_ok (enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return !STRICT_ALIGNMENT; +} + +bool hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false ( CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, enum machine_mode mode ATTRIBUTE_UNUSED, diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 4ff0ea3..024a080 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -47,6 +47,8 @@ extern void default_unwind_emit (FILE *, rtx); extern bool default_scalar_mode_supported_p (enum machine_mode); +extern bool default_vect_misaligned_mem_ok (enum machine_mode); + /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fbd7608..536ccf5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,38 @@ +2004-09-23 Dorit Naishlos + + * gcc.dg/vect/vect-27.c: Now vectorized on altivec. + * gcc.dg/vect/vect-29.c: Now vectorized on altivec. + * gcc.dg/vect/vect-48.c: Now vectorized on altivec. + * gcc.dg/vect/vect-56.c: Now vectorized on altivec. + * gcc.dg/vect/vect-72.c: New test for altivec and sse2. + * gcc.dg/vect/vect-77.c: Now vectorized on altivec. + + * gcc.dg/vect/vect-27a.c: New test for altivec and mmx. + * gcc.dg/vect/vect-29a.c: New test for altivec and mmx. + * gcc.dg/vect/vect-48a.c: New test for altivec and mmx. + * gcc.dg/vect/vect-56a.c: New test for altivec and mmx. + * gcc.dg/vect/vect-72a.c: New test for altivec and mmx. + * gcc.dg/vect/vect-77a.c: New test for altivec and mmx. + + * gcc.dg/vect/vect-13.c: Change to run test instead of compile. + + * gcc.dg/vect/vect-44.c: Check additional cases. + * gcc.dg/vect/vect-48.c: Check additional cases. + + * gcc.dg/vect/vect-26.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-27.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-28.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-29.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-4?.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-75.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-76.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-77.c: Use sse2 instead of sse. + * gcc.dg/vect/vect-78.c: Use sse2 instead of sse. + + * gcc.dg/vect/vect-5?.c: Use sse2 instead of sse. Add return 0. + * gcc.dg/vect/vect-60.c: Use sse2 instead of sse. Add return 0. + * gcc.dg/vect/vect-61.c: Use sse2 instead of sse. Add return 0. + 2004-09-23 Zdenek Dvorak * gcc.dg/tree-ssa/loop-6.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-13.c b/gcc/testsuite/gcc.dg/vect/vect-13.c index 21385b4..052abe1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-13.c +++ b/gcc/testsuite/gcc.dg/vect/vect-13.c @@ -1,25 +1,41 @@ -/* { dg-do compile { target powerpc*-*-* i?86-*-* x86_64-*-* } } */ +/* { dg-do run { target powerpc*-*-* i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ +#include +#include "tree-vect.h" #define N 16 int a[N]; int results[N] = {0,1,2,3,0,0,0,0,0,0,0,0,12,13,14,15}; -int main () +int main1() { int i; int b[N] = {0,1,2,3,-4,-5,-6,-7,-8,-9,-10,-11,12,13,14,15}; - /* Not vectorizable yet (condition in loop). */ + /* Max pattern. */ for (i = 0; i < N; i++) { a[i] = (b[i] >= 0 ? b[i] : 0); } + /* Check results */ + for (i = 0; i < N; i++) + { + if (a[i] != results[i]) + abort (); + } + return 0; } +int main (void) +{ + check_vect (); + + return main1 (); +} + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c index bfeb76e..fd21bed 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-26.c +++ b/gcc/testsuite/gcc.dg/vect/vect-26.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c index 35e8f41..d2e8c94 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-27.c +++ b/gcc/testsuite/gcc.dg/vect/vect-27.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -43,5 +43,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-27a.c b/gcc/testsuite/gcc.dg/vect/vect-27a.c new file mode 100644 index 0000000..9dd75498 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-27a.c @@ -0,0 +1,47 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 128 + +/* unaligned load. */ + +int main1 () +{ + int i; + int ia[N]; + int ib[N+1]; + + for (i=0; i < N; i++) + { + ib[i] = i; + } + + for (i = 1; i <= N; i++) + { + ia[i-1] = ib[i]; + } + + /* check results: */ + for (i = 1; i <= N; i++) + { + if (ia[i-1] != ib[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-28.c b/gcc/testsuite/gcc.dg/vect/vect-28.c index b10cf13..cb8d7cb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-28.c +++ b/gcc/testsuite/gcc.dg/vect/vect-28.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c index 80754f5..c0383c7 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-29.c +++ b/gcc/testsuite/gcc.dg/vect/vect-29.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -46,5 +46,5 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-29a.c b/gcc/testsuite/gcc.dg/vect/vect-29a.c new file mode 100644 index 0000000..13cd5c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-29a.c @@ -0,0 +1,50 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 128 +#define OFF 3 + +/* unaligned load. */ + +int main1 (int off) +{ + int i; + int ia[N]; + int ib[N+OFF]; + + for (i = 0; i < N+OFF; i++) + { + ib[i] = i; + } + + for (i = 0; i < N; i++) + { + ia[i] = ib[i+off]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != ib[i+off]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (0); /* aligned */ + main1 (OFF); /* unaligned */ + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-40.c b/gcc/testsuite/gcc.dg/vect/vect-40.c index e3dd277..2507575 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-40.c +++ b/gcc/testsuite/gcc.dg/vect/vect-40.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-41.c b/gcc/testsuite/gcc.dg/vect/vect-41.c index 0f9cae4..f54258f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-41.c +++ b/gcc/testsuite/gcc.dg/vect/vect-41.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c index 7f79d42..9d40a37 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-42.c +++ b/gcc/testsuite/gcc.dg/vect/vect-42.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-43.c b/gcc/testsuite/gcc.dg/vect/vect-43.c index 370b5dd..eedef26 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-43.c +++ b/gcc/testsuite/gcc.dg/vect/vect-43.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c index 4068ab1..8aab2fe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-44.c +++ b/gcc/testsuite/gcc.dg/vect/vect-44.c @@ -1,13 +1,14 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" #define N 256 +typedef float afloat __attribute__ ((__aligned__(16))); void bar (float *pa, float *pb, float *pc) { @@ -42,13 +43,17 @@ main1 (float * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc int main (void) { int i; - float a[N]; - float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; - float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + afloat a[N+4]; + afloat b[N+4] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60,63,66,69}; + afloat c[N+4] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23}; check_vect (); main1 (a,b,c); + main1 (&a[1],b,c); + main1 (a,&b[1],c); + main1 (&a[1],&b[1],&c[1]); + return 0; } diff --git a/gcc/testsuite/gcc.dg/vect/vect-45.c b/gcc/testsuite/gcc.dg/vect/vect-45.c index c0b0029..bc5d5c8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-45.c +++ b/gcc/testsuite/gcc.dg/vect/vect-45.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-46.c b/gcc/testsuite/gcc.dg/vect/vect-46.c index 1fb08a7..7710c03 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-46.c +++ b/gcc/testsuite/gcc.dg/vect/vect-46.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-47.c b/gcc/testsuite/gcc.dg/vect/vect-47.c index e1e1d38..ccbcccf 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-47.c +++ b/gcc/testsuite/gcc.dg/vect/vect-47.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c index c7485be..3dfdb95 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-48.c +++ b/gcc/testsuite/gcc.dg/vect/vect-48.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -43,14 +43,16 @@ main1 (afloat * __restrict__ pa, float * __restrict__ pb, float * __restrict__ p int main (void) { int i; - float a[N]; - float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; - float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + afloat a[N]; + afloat b[N+1] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60}; + afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; check_vect (); main1 (a,b,c); + main1 (a,&b[1],c); + return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-48a.c b/gcc/testsuite/gcc.dg/vect/vect-48a.c new file mode 100644 index 0000000..0422edc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-48a.c @@ -0,0 +1,58 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 256 + +typedef short ashort __attribute__ ((__aligned__(16))); + +void bar (short *pa, short *pb, short *pc) +{ + int i; + + /* check results: */ + for (i = 0; i < N; i++) + { + if (pa[i] != (pb[i] + pc[i])) + abort (); + } + + return; +} + + +int +main1 (ashort * __restrict__ pa, short * __restrict__ pb, short * __restrict__ pc) +{ + int i; + + for (i = 0; i < N; i++) + { + pa[i] = pb[i] + pc[i]; + } + + bar (pa,pb,pc); + + return 0; +} + +int main (void) +{ + int i; + ashort a[N]; + ashort b[N+1] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60}; + ashort c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + + check_vect (); + + main1 (a,b,c); + main1 (a,&b[1],c); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-49.c b/gcc/testsuite/gcc.dg/vect/vect-49.c index 8d8b212..6a26699 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-49.c +++ b/gcc/testsuite/gcc.dg/vect/vect-49.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c index cd7cf5d..3b952a3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-50.c +++ b/gcc/testsuite/gcc.dg/vect/vect-50.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -49,6 +49,7 @@ int main (void) check_vect (); main1 (N,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-51.c b/gcc/testsuite/gcc.dg/vect/vect-51.c index 65c0f40..7804a4c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-51.c +++ b/gcc/testsuite/gcc.dg/vect/vect-51.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -49,6 +49,7 @@ int main (void) check_vect (); main1 (N,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c index 32a584b..60d9b79 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-52.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +51,7 @@ int main (void) main1 (N,a,&b[1],c); main1 (N,a,&b[1],&c[1]); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-53.c b/gcc/testsuite/gcc.dg/vect/vect-53.c index 4b99304..e8f1747 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-53.c +++ b/gcc/testsuite/gcc.dg/vect/vect-53.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +51,7 @@ int main (void) main1 (N,a,&b[1],c); main1 (N,a,&b[1],&c[1]); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c index 5ab9e02..172feae 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-54.c +++ b/gcc/testsuite/gcc.dg/vect/vect-54.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -50,6 +50,7 @@ int main (void) check_vect (); main1 (a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-55.c b/gcc/testsuite/gcc.dg/vect/vect-55.c index 2257a23..e5c4ba2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-55.c +++ b/gcc/testsuite/gcc.dg/vect/vect-55.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -50,6 +50,7 @@ int main (void) check_vect (); main1 (a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c index 0e20137..9170e49 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-56.c +++ b/gcc/testsuite/gcc.dg/vect/vect-56.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -50,6 +50,7 @@ int main (void) check_vect (); main1 (a,b,c); + return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-56a.c b/gcc/testsuite/gcc.dg/vect/vect-56a.c new file mode 100644 index 0000000..5d8ed11 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-56a.c @@ -0,0 +1,56 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 256 + +typedef short ashort __attribute__ ((__aligned__(16))); + +void bar (ashort *pa, ashort *pb, ashort *pc) +{ + int i; + + /* check results: */ + for (i = 0; i < N/2; i++) + { + if (pa[i] != (pb[i+1] + pc[i+1])) + abort (); + } + + return; +} + + +int +main1 (ashort * __restrict__ pa, ashort * __restrict__ pb, ashort * __restrict__ pc) +{ + int i; + + for (i = 0; i < N/2; i++) + { + pa[i] = pb[i+1] + pc[i+1]; + } + + bar (pa,pb,pc); + + return 0; +} + +int main (void) +{ + int i; + ashort a[N]; + ashort b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; + ashort c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + + check_vect (); + + main1 (a,b,c); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-57.c b/gcc/testsuite/gcc.dg/vect/vect-57.c index 0e675d7..ce2e740 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-57.c +++ b/gcc/testsuite/gcc.dg/vect/vect-57.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -50,6 +50,7 @@ int main (void) check_vect (); main1 (a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c index 57c2779..c080c90 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-58.c +++ b/gcc/testsuite/gcc.dg/vect/vect-58.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +51,7 @@ int main (void) check_vect (); main1 (n,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-59.c b/gcc/testsuite/gcc.dg/vect/vect-59.c index 3dfbe19..4bdd7b4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-59.c +++ b/gcc/testsuite/gcc.dg/vect/vect-59.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +51,7 @@ int main (void) check_vect (); main1 (n,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c index 276b7e2..e19d36e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-60.c +++ b/gcc/testsuite/gcc.dg/vect/vect-60.c @@ -1,7 +1,8 @@ + /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +52,7 @@ int main (void) check_vect (); main1 (n,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-61.c b/gcc/testsuite/gcc.dg/vect/vect-61.c index 257ef38..6df22a6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-61.c +++ b/gcc/testsuite/gcc.dg/vect/vect-61.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -51,6 +51,7 @@ int main (void) check_vect (); main1 (n,a,b,c); + return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c new file mode 100644 index 0000000..1a2ad07 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-72.c @@ -0,0 +1,47 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 128 + +/* unaligned load. */ + +int main1 () +{ + int i; + char ia[N]; + char ib[N+1]; + + for (i=0; i < N+1; i++) + { + ib[i] = i; + } + + for (i = 1; i < N+1; i++) + { + ia[i-1] = ib[i]; + } + + /* check results: */ + for (i = 1; i <= N; i++) + { + if (ia[i-1] != ib[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-72a.c b/gcc/testsuite/gcc.dg/vect/vect-72a.c new file mode 100644 index 0000000..71fda70 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-72a.c @@ -0,0 +1,47 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 128 + +/* unaligned load. */ + +int main1 () +{ + int i; + char ia[N]; + char ib[N+1]; + + for (i=0; i < N+1; i++) + { + ib[i] = i; + } + + for (i = 1; i < N+1; i++) + { + ia[i-1] = ib[i]; + } + + /* check results: */ + for (i = 1; i <= N; i++) + { + if (ia[i-1] != ib[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c index 5fcb3ab..f5fee58 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-75.c +++ b/gcc/testsuite/gcc.dg/vect/vect-75.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-76.c b/gcc/testsuite/gcc.dg/vect/vect-76.c index 11b87e3..17d6ff7 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-76.c +++ b/gcc/testsuite/gcc.dg/vect/vect-76.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-77.c b/gcc/testsuite/gcc.dg/vect/vect-77.c index c5dacc5..9f5697d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-77.c +++ b/gcc/testsuite/gcc.dg/vect/vect-77.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" @@ -43,5 +43,5 @@ int main (void) } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-77a.c b/gcc/testsuite/gcc.dg/vect/vect-77a.c new file mode 100644 index 0000000..afa0c49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-77a.c @@ -0,0 +1,47 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 8 +#define OFF 8 + +typedef int aint __attribute__ ((__aligned__(16))); + +aint ib[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10, 14, 22, 26, 34}; + +int main1 (aint *ib, int off) +{ + int i; + int ia[N]; + + for (i = 0; i < N; i++) + { + ia[i] = ib[i+off]; + } + + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != ib[i+off]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (ib, 8); + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-78.c b/gcc/testsuite/gcc.dg/vect/vect-78.c index 6a4eb8c..75ad3c2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-78.c +++ b/gcc/testsuite/gcc.dg/vect/vect-78.c @@ -1,7 +1,7 @@ /* { dg-do run { target powerpc*-*-* } } */ /* { dg-do run { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */ #include #include "tree-vect.h" diff --git a/gcc/tree-dump.c b/gcc/tree-dump.c index d8d370f..fed3bfc 100644 --- a/gcc/tree-dump.c +++ b/gcc/tree-dump.c @@ -527,6 +527,8 @@ dequeue_and_dump (dump_info_p di) case TRUTH_NOT_EXPR: case ADDR_EXPR: case INDIRECT_REF: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case CLEANUP_POINT_EXPR: case SAVE_EXPR: case REALPART_EXPR: diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c index b7bc7c3..b7ed2ed 100644 --- a/gcc/tree-eh.c +++ b/gcc/tree-eh.c @@ -1767,6 +1767,8 @@ tree_could_trap_p (tree expr) return !in_array_bounds_p (expr); case INDIRECT_REF: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: return !TREE_THIS_NOTRAP (expr); case ASM_EXPR: diff --git a/gcc/tree-gimple.c b/gcc/tree-gimple.c index d9fe020..fe707d5 100644 --- a/gcc/tree-gimple.c +++ b/gcc/tree-gimple.c @@ -323,7 +323,9 @@ is_gimple_addressable (tree t) return (is_gimple_id (t) || handled_component_p (t) || TREE_CODE (t) == REALPART_EXPR || TREE_CODE (t) == IMAGPART_EXPR - || TREE_CODE (t) == INDIRECT_REF); + || TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == ALIGN_INDIRECT_REF + || TREE_CODE (t) == MISALIGNED_INDIRECT_REF); } /* Return true if T is function invariant. Or rather a restricted @@ -563,6 +565,8 @@ get_base_address (tree t) if (SSA_VAR_P (t) || TREE_CODE (t) == STRING_CST || TREE_CODE (t) == CONSTRUCTOR + || TREE_CODE (t) == MISALIGNED_INDIRECT_REF + || TREE_CODE (t) == ALIGN_INDIRECT_REF || TREE_CODE (t) == INDIRECT_REF) return t; else diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 71196fb..0bb2072 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1018,6 +1018,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, case ADDR_EXPR: case PREDECREMENT_EXPR: case PREINCREMENT_EXPR: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case INDIRECT_REF: if (TREE_CODE (node) == ADDR_EXPR && (TREE_CODE (TREE_OPERAND (node, 0)) == STRING_CST @@ -1034,6 +1036,13 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, } else dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + + if (TREE_CODE (node) == MISALIGNED_INDIRECT_REF) + { + pp_string (buffer, "{misalignment: "); + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); + pp_character (buffer, '}'); + } break; case POSTDECREMENT_EXPR: @@ -1451,6 +1460,16 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, is_stmt = false; break; + case REALIGN_LOAD_EXPR: + pp_string (buffer, "REALIGN_LOAD <"); + dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false); + pp_string (buffer, ">"); + break; + default: NIY; } @@ -1710,6 +1729,8 @@ op_prio (tree op) case PREINCREMENT_EXPR: case PREDECREMENT_EXPR: case NEGATE_EXPR: + case ALIGN_INDIRECT_REF: + case MISALIGNED_INDIRECT_REF: case INDIRECT_REF: case ADDR_EXPR: case FLOAT_EXPR: @@ -1838,6 +1859,12 @@ op_symbol (tree op) case INDIRECT_REF: return "*"; + case ALIGN_INDIRECT_REF: + return "A*"; + + case MISALIGNED_INDIRECT_REF: + return "M*"; + case TRUNC_DIV_EXPR: case RDIV_EXPR: return "/"; diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 3d99d94..b0cdddc 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -495,14 +495,16 @@ collect_points_to_info_for (struct alias_info *ai, tree ptr) /* Helper for ptr_is_dereferenced_by. Called by walk_tree to look for - INDIRECT_REF nodes for the pointer passed in DATA. */ + (ALIGN/MISALIGNED_)INDIRECT_REF nodes for the pointer passed in DATA. */ static tree find_ptr_dereference (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data) { tree ptr = (tree) data; - if (TREE_CODE (*tp) == INDIRECT_REF + if ((TREE_CODE (*tp) == INDIRECT_REF + || TREE_CODE (*tp) == ALIGN_INDIRECT_REF + || TREE_CODE (*tp) == MISALIGNED_INDIRECT_REF) && TREE_OPERAND (*tp, 0) == ptr) return *tp; @@ -510,8 +512,9 @@ find_ptr_dereference (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data) } -/* Return true if STMT contains INDIRECT_REF . *IS_STORE is set - to 'true' if the dereference is on the LHS of an assignment. */ +/* Return true if STMT contains (ALIGN/MISALIGNED_)INDIRECT_REF . + *IS_STORE is set to 'true' if the dereference is on the LHS of an + assignment. */ static bool ptr_is_dereferenced_by (tree ptr, tree stmt, bool *is_store) diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index 80357dc..937a301 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -418,7 +418,9 @@ mark_stmt_if_obviously_necessary (tree stmt, bool aggressive) if (is_global_var (lhs)) mark_stmt_necessary (stmt, true); } - else if (TREE_CODE (lhs) == INDIRECT_REF) + else if (TREE_CODE (lhs) == INDIRECT_REF + || TREE_CODE (lhs) == ALIGN_INDIRECT_REF + || TREE_CODE (lhs) == MISALIGNED_INDIRECT_REF) { tree ptr = TREE_OPERAND (lhs, 0); struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr); diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index 057e72a..aed07e2 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -2414,7 +2414,9 @@ record_equivalences_from_stmt (tree stmt, t = TREE_OPERAND (t, 0); /* Now see if this is a pointer dereference. */ - if (TREE_CODE (t) == INDIRECT_REF) + if (TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == ALIGN_INDIRECT_REF + || TREE_CODE (t) == MISALIGNED_INDIRECT_REF) { tree op = TREE_OPERAND (t, 0); diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index de86491..4aafc81 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -137,6 +137,8 @@ for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data) case SSA_NAME: return cbck (*addr_p, addr_p, data); + case MISALIGNED_INDIRECT_REF: + case ALIGN_INDIRECT_REF: case INDIRECT_REF: nxt = &TREE_OPERAND (*addr_p, 0); return cbck (*addr_p, nxt, data); @@ -1101,7 +1103,9 @@ is_call_clobbered_ref (tree ref) if (DECL_P (base)) return is_call_clobbered (base); - if (TREE_CODE (base) == INDIRECT_REF) + if (TREE_CODE (base) == INDIRECT_REF + || TREE_CODE (base) == ALIGN_INDIRECT_REF + || TREE_CODE (base) == MISALIGNED_INDIRECT_REF) { /* Check whether the alias tags associated with the pointer are call clobbered. */ diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 078e8a3..08a962d 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -1269,7 +1269,9 @@ find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p) || zero_p (step)) goto fail; - if (TREE_CODE (base) == INDIRECT_REF) + if (TREE_CODE (base) == INDIRECT_REF + || TREE_CODE (base) == ALIGN_INDIRECT_REF + || TREE_CODE (base) == MISALIGNED_INDIRECT_REF) base = TREE_OPERAND (base, 0); else base = build_addr (base); @@ -1699,7 +1701,9 @@ add_address_candidates (struct ivopts_data *data, if (base != TREE_OPERAND (iv->base, 0)) { - if (TREE_CODE (base) == INDIRECT_REF) + if (TREE_CODE (base) == INDIRECT_REF + || TREE_CODE (base) == ALIGN_INDIRECT_REF + || TREE_CODE (base) == MISALIGNED_INDIRECT_REF) base = TREE_OPERAND (base, 0); else base = build_addr (base); @@ -3826,13 +3830,16 @@ unshare_and_remove_ssa_names (tree ref) static void rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with) { - tree var = get_base_address (*op), new_var, new_name, copy, name; + tree bvar, var, new_var, new_name, copy, name; tree orig; + var = bvar = get_base_address (*op); + if (!var || TREE_CODE (with) != SSA_NAME) goto do_rewrite; - - if (TREE_CODE (var) == INDIRECT_REF) + if (TREE_CODE (var) == INDIRECT_REF + || TREE_CODE (var) == ALIGN_INDIRECT_REF + || TREE_CODE (var) == MISALIGNED_INDIRECT_REF) var = TREE_OPERAND (var, 0); if (TREE_CODE (var) == SSA_NAME) { @@ -3869,12 +3876,20 @@ rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with) do_rewrite: orig = NULL_TREE; - if (TREE_CODE (*op) == INDIRECT_REF) + if (TREE_CODE (*op) == INDIRECT_REF + || TREE_CODE (*op) == ALIGN_INDIRECT_REF + || TREE_CODE (*op) == MISALIGNED_INDIRECT_REF) orig = REF_ORIGINAL (*op); if (!orig) orig = unshare_and_remove_ssa_names (*op); - *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with); + if (TREE_CODE (bvar) == ALIGN_INDIRECT_REF) + *op = build1 (ALIGN_INDIRECT_REF, TREE_TYPE (*op), with); + else if (TREE_CODE (bvar) == MISALIGNED_INDIRECT_REF) + *op = build2 (MISALIGNED_INDIRECT_REF, TREE_TYPE (*op), with, TREE_OPERAND (*op, 1)); + else + *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with); + /* Record the original reference, for purposes of alias analysis. */ REF_ORIGINAL (*op) = orig; } diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c index 8c05169..55e6f8a 100644 --- a/gcc/tree-ssa-operands.c +++ b/gcc/tree-ssa-operands.c @@ -1009,6 +1009,11 @@ get_expr_operands (tree stmt, tree *expr_p, int flags) add_stmt_operand (expr_p, stmt, flags); return; + case MISALIGNED_INDIRECT_REF: + get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags); + /* fall through */ + + case ALIGN_INDIRECT_REF: case INDIRECT_REF: get_indirect_ref_operands (stmt, expr, flags); return; @@ -1162,6 +1167,14 @@ get_expr_operands (tree stmt, tree *expr_p, int flags) return; } + case REALIGN_LOAD_EXPR: + { + get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags); + get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags); + get_expr_operands (stmt, &TREE_OPERAND (expr, 2), flags); + return; + } + case BLOCK: case FUNCTION_DECL: case EXC_PTR_EXPR: @@ -1274,7 +1287,8 @@ get_asm_expr_operands (tree stmt) } } -/* A subroutine of get_expr_operands to handle INDIRECT_REF. */ +/* A subroutine of get_expr_operands to handle INDIRECT_REF, + ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF. */ static void get_indirect_ref_operands (tree stmt, tree expr, int flags) diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 33f68ad..b158278 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -179,7 +179,8 @@ static bool vect_compute_data_ref_alignment static bool vect_analyze_data_ref_access (struct data_reference *); static bool vect_get_first_index (tree, tree *); static bool vect_can_force_dr_alignment_p (tree, unsigned int); -static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool); +static struct data_reference * vect_analyze_pointer_ref_access + (tree, tree, bool); static tree vect_get_base_and_bit_offset (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*); static struct data_reference * vect_analyze_pointer_ref_access @@ -193,9 +194,11 @@ static tree vect_get_symbl_and_dr /* Utility functions for the code transformation. */ static tree vect_create_destination_var (tree, tree); -static tree vect_create_data_ref (tree, block_stmt_iterator *); -static tree vect_create_index_for_vector_ref (struct loop *, block_stmt_iterator *); -static tree vect_create_addr_base_for_vector_ref (tree, tree *); +static tree vect_create_data_ref_ptr + (tree, block_stmt_iterator *, tree, tree *, bool); +static tree vect_create_index_for_vector_ref + (struct loop *, block_stmt_iterator *); +static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree); static tree get_vectype_for_scalar_type (tree); static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); static tree vect_get_vec_def_for_operand (tree, tree); @@ -702,8 +705,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi) Input: STMT: The statement containing the data reference. - NEW_STMT_LIST: Must be initialized to NULL_TREE or a - statement list. + NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. + OFFSET: Optional. If supplied, it is be added to the initial address. Output: 1. Return an SSA_NAME whose value is the address of the memory location of the @@ -715,7 +718,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi) static tree vect_create_addr_base_for_vector_ref (tree stmt, - tree *new_stmt_list) + tree *new_stmt_list, + tree offset) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); struct loop *loop = STMT_VINFO_LOOP (stmt_info); @@ -776,7 +780,8 @@ vect_create_addr_base_for_vector_ref (tree stmt, dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref"); add_referenced_tmp_var (dest); - data_ref_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest); + data_ref_base = + force_gimple_operand (data_ref_base, &new_stmt, false, dest); append_to_statement_list_force (new_stmt, new_stmt_list); vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base); @@ -794,6 +799,17 @@ vect_create_addr_base_for_vector_ref (tree stmt, init_val = force_gimple_operand (init_oval, &new_stmt, false, dest); append_to_statement_list_force (new_stmt, new_stmt_list); + if (offset) + { + tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset"); + add_referenced_tmp_var (tmp); + vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset); + vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt); + init_val = make_ssa_name (tmp, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = init_val; + append_to_statement_list_force (vec_stmt, new_stmt_list); + } + array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val, NULL_TREE, NULL_TREE); addr_base = build_fold_addr_expr (array_ref); @@ -806,6 +822,7 @@ vect_create_addr_base_for_vector_ref (tree stmt, new_temp = make_ssa_name (addr_expr, vec_stmt); TREE_OPERAND (vec_stmt, 0) = new_temp; append_to_statement_list_force (vec_stmt, new_stmt_list); + return new_temp; } @@ -856,31 +873,56 @@ vect_align_data_ref (tree stmt) } -/* Function vect_create_data_ref. +/* Function vect_create_data_ref_ptr. Create a memory reference expression for vector access, to be used in a - vector load/store stmt. + vector load/store stmt. The reference is based on a new pointer to vector + type (vp). Input: - STMT: a stmt that references memory. expected to be of the form - MODIFY_EXPR or MODIFY_EXPR . - BSI: block_stmt_iterator where new stmts can be added. + 1. STMT: a stmt that references memory. Expected to be of the form + MODIFY_EXPR or MODIFY_EXPR . + 2. BSI: block_stmt_iterator where new stmts can be added. + 3. OFFSET (optional): an offset to be added to the initial address accessed + by the data-ref in STMT. + 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain + pointing to the initial address. Output: - 1. Declare a new ptr to vector_type, and have it point to the array base. - For example, for vector of type V8HI: - v8hi *p0; - p0 = (v8hi *)&a; - 2. Create a data-reference based on the new vector pointer p0, and using - a new index variable 'idx'. Return the expression '(*p0)[idx]'. + 1. Declare a new ptr to vector_type, and have it point to the base of the + data reference (initial addressed accessed by the data reference). + For example, for vector of type V8HI, the following code is generated: + + v8hi *vp; + vp = (v8hi *)initial_address; + + if OFFSET is not supplied: + initial_address = &a[init]; + if OFFSET is supplied: + initial_address = &a[init + OFFSET]; + + Return the initial_address in INITIAL_ADDRESS. + + 2. Create a data-reference in the loop based on the new vector pointer vp, + and using a new index variable 'idx' as follows: + + vp' = vp + update + + where if ONLY_INIT is true: + update = zero + and otherwise + update = idx + vector_type_size + + Return the pointer vp'. + FORNOW: handle only aligned and consecutive accesses. */ static tree -vect_create_data_ref (tree stmt, block_stmt_iterator *bsi) +vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset, + tree *initial_address, bool only_init) { - tree base_name, data_ref_base, data_ref_base_type; - tree array_type; + tree base_name; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); struct loop *loop = STMT_VINFO_LOOP (stmt_info); @@ -897,55 +939,44 @@ vect_create_data_ref (tree stmt, block_stmt_iterator *bsi) tree vec_stmt; tree new_stmt_list = NULL_TREE; tree idx; - tree new_base; - tree data_ref; - edge pe; + edge pe = loop_preheader_edge (loop); basic_block new_bb; - - /* FORNOW: make sure the data reference is aligned. */ - vect_align_data_ref (stmt); + tree vect_ptr_init; + tree vectype_size; + tree ptr_update; + tree data_ref_ptr; base_name = unshare_expr (DR_BASE_NAME (dr)); - data_ref_base = STMT_VINFO_VECT_DR_BASE (stmt_info); - data_ref_base_type = TREE_TYPE (data_ref_base); - - array_type = build_array_type (vectype, 0); - TYPE_ALIGN (array_type) = TYPE_ALIGN (data_ref_base_type); - vect_ptr_type = build_pointer_type (array_type); - if (vect_debug_details (NULL)) { + tree data_ref_base = base_name; fprintf (dump_file, "create array_ref of type: "); print_generic_expr (dump_file, vectype, TDF_SLIM); - } - - /* Create: vectype *p; */ - vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, - get_name (base_name)); - add_referenced_tmp_var (vect_ptr); - - if (vect_debug_details (NULL)) - { if (TREE_CODE (data_ref_base) == VAR_DECL) - fprintf (dump_file, "vectorizing a one dimensional array ref: "); + fprintf (dump_file, "vectorizing a one dimensional array ref: "); else if (TREE_CODE (data_ref_base) == ARRAY_REF) - fprintf (dump_file, "vectorizing a multidimensional array ref: "); + fprintf (dump_file, "vectorizing a multidimensional array ref: "); else if (TREE_CODE (data_ref_base) == COMPONENT_REF) - fprintf (dump_file, "vectorizing a record based array ref: "); + fprintf (dump_file, "vectorizing a record based array ref: "); else if (TREE_CODE (data_ref_base) == SSA_NAME) - fprintf (dump_file, "vectorizing a pointer ref: "); - else if (TREE_CODE (data_ref_base) == ADDR_EXPR - || TREE_CODE (data_ref_base) == PLUS_EXPR - || TREE_CODE (data_ref_base) == MINUS_EXPR) - fprintf (dump_file, "vectorizing an address expr: "); + fprintf (dump_file, "vectorizing a pointer ref: "); print_generic_expr (dump_file, base_name, TDF_SLIM); } - /* Handle aliasing: */ + /** (1) Create the new vector-pointer variable: **/ + + vect_ptr_type = build_pointer_type (vectype); + vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, + get_name (base_name)); + add_referenced_tmp_var (vect_ptr); + + + /** (2) Handle aliasing information of the new vector-pointer: **/ + tag = STMT_VINFO_MEMTAG (stmt_info); gcc_assert (tag); get_var_ann (vect_ptr)->type_mem_tag = tag; - + /* Mark for renaming all aliased variables (i.e, the may-aliases of the type-mem-tag). */ nvuses = NUM_VUSES (vuses); @@ -969,36 +1000,56 @@ vect_create_data_ref (tree stmt, block_stmt_iterator *bsi) if (TREE_CODE (def) == SSA_NAME) bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid); } - - pe = loop_preheader_edge (loop); - /* Create: (&(base[init_val]) */ - new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list); - pe = loop_preheader_edge (loop); - new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); - gcc_assert (!new_bb); + /** (3) Calculate the initial address the vector-pointer, and set + the vector-pointer to point to it before the loop: **/ + + /* Create: (&(base[init_val+offset]) in the loop preheader. */ + new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, + offset); + pe = loop_preheader_edge (loop); + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); + gcc_assert (!new_bb); + *initial_address = new_temp; - /* p = (vectype_array *) addr_base */ + /* Create: p = (vectype *) initial_base */ vec_stmt = fold_convert (vect_ptr_type, new_temp); vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); new_temp = make_ssa_name (vect_ptr, vec_stmt); TREE_OPERAND (vec_stmt, 0) = new_temp; - new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); - gcc_assert (!new_bb); + new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt); + gcc_assert (!new_bb); + vect_ptr_init = TREE_OPERAND (vec_stmt, 0); + + + /** (4) Handle the updating of the vector-pointer inside the loop: **/ + + if (only_init) /* No update in loop is required. */ + return vect_ptr_init; - /*** create data ref: '(*p)[idx]' ***/ idx = vect_create_index_for_vector_ref (loop, bsi); - new_base = build_fold_indirect_ref (new_temp); - data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE); - if (vect_debug_details (NULL)) - { - fprintf (dump_file, "created new data-ref: "); - print_generic_expr (dump_file, data_ref, TDF_SLIM); - } + /* Create: update = idx * vectype_size */ + ptr_update = create_tmp_var (integer_type_node, "update"); + add_referenced_tmp_var (ptr_update); + vectype_size = build_int_cst (integer_type_node, + GET_MODE_SIZE (TYPE_MODE (vectype))); + vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt); + new_temp = make_ssa_name (ptr_update, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); - return data_ref; + /* Create: data_ref_ptr = vect_ptr_init + update */ + vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp); + vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt); + new_temp = make_ssa_name (vect_ptr, vec_stmt); + TREE_OPERAND (vec_stmt, 0) = new_temp; + bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); + data_ref_ptr = TREE_OPERAND (vec_stmt, 0); + + return data_ref_ptr; } @@ -1220,8 +1271,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) /* Make sure bsi points to the stmt that is being vectorized. */ - /* Assumption: any stmts created for the vectorization of smtmt S are - inserted before S. BSI may point to S or some new stmt before it. */ + /* Assumption: any stmts created for the vectorization of stmt S were + inserted before S. BSI is expected to point to S or some new stmt before S. */ while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) bsi_next (bsi); @@ -1424,6 +1475,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) tree vectype = STMT_VINFO_VECTYPE (stmt_info); struct loop *loop = STMT_VINFO_LOOP (stmt_info); enum machine_mode vec_mode; + tree dummy; /* Is vectorizable store? */ @@ -1452,6 +1504,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info))) + return false; + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; @@ -1467,7 +1522,10 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); /* Handle def. */ - data_ref = vect_create_data_ref (stmt, bsi); + /* FORNOW: make sure the data reference is aligned. */ + vect_align_data_ref (stmt); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); + data_ref = build_fold_indirect_ref (data_ref); /* Arguments are ready. create the new vector stmt. */ *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1); @@ -1493,9 +1551,17 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) tree data_ref = NULL; tree op; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree new_temp; - enum machine_mode vec_mode; + int mode; + tree init_addr; + tree new_stmt; + tree dummy; + basic_block new_bb; + struct loop *loop = STMT_VINFO_LOOP (stmt_info); + edge pe = loop_preheader_edge (loop); + bool software_pipeline_loads_p = false; /* Is vectorizable load? */ @@ -1513,11 +1579,31 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - vec_mode = TYPE_MODE (vectype); + mode = (int) TYPE_MODE (vectype); + /* FORNOW. In some cases can vectorize even if data-type not supported - (e.g. - data copies). */ - if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing) - return false; + (e.g. - data copies). */ + if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing) + { + if (vect_debug_details (loop)) + fprintf (dump_file, "Aligned load, but unsupported type."); + return false; + } + + if (!aligned_access_p (dr)) + { + if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing + && (!targetm.vectorize.builtin_mask_for_load + || targetm.vectorize.builtin_mask_for_load ())) + software_pipeline_loads_p = true; + else if (!targetm.vectorize.misaligned_mem_ok (mode)) + { + /* Possibly unaligned access, and can't sofware pipeline the loads */ + if (vect_debug_details (loop)) + fprintf (dump_file, "Arbitrary load not supported."); + return false; + } + } if (!vec_stmt) /* transformation not required. */ { @@ -1530,19 +1616,130 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (vect_debug_details (NULL)) fprintf (dump_file, "transform load."); - /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype); + if (!software_pipeline_loads_p) + { + /* Create: + p = initial_addr; + indx = 0; + loop { + vec_dest = *(p); + indx = indx + 1; + } + */ + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false); + if (aligned_access_p (dr)) + data_ref = build_fold_indirect_ref (data_ref); + else + { + int mis = DR_MISALIGNMENT (dr); + tree tmis = (mis == -1 ? + integer_zero_node : + build_int_cst (integer_type_node, mis)); + tmis = int_const_binop (MULT_EXPR, tmis, + build_int_cst (integer_type_node, BITS_PER_UNIT), 1); + data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis); + } + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + } + else /* software-pipeline the loads */ + { + /* Create: + p1 = initial_addr; + msq_init = *(floor(p1)) + p2 = initial_addr + VS - 1; + magic = have_builtin ? builtin_result : initial_address; + indx = 0; + loop { + p2' = p2 + indx * vectype_size + lsq = *(floor(p2')) + vec_dest = realign_load (msq, lsq, magic) + indx = indx + 1; + msq = lsq; + } + */ + + tree offset; + tree magic; + tree phi_stmt; + tree msq_init; + tree msq, lsq; + tree dataref_ptr; + tree params; + + /* <1> Create msq_init = *(floor(p1)) in the loop preheader */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, + &init_addr, true); + data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); + gcc_assert (!new_bb); + msq_init = TREE_OPERAND (new_stmt, 0); + + + /* <2> Create lsq = *(floor(p2')) in the loop */ + offset = build_int_cst (integer_type_node, + GET_MODE_NUNITS (TYPE_MODE (vectype))); + offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false); + data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + lsq = TREE_OPERAND (new_stmt, 0); + + + /* <3> */ + if (targetm.vectorize.builtin_mask_for_load) + { + /* Create permutation mask, if required, in loop preheader. */ + tree builtin_decl; + params = build_tree_list (NULL_TREE, init_addr); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + builtin_decl = targetm.vectorize.builtin_mask_for_load (); + new_stmt = build_function_call_expr (builtin_decl, params); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + new_bb = bsi_insert_on_edge_immediate (pe, new_stmt); + gcc_assert (!new_bb); + magic = TREE_OPERAND (new_stmt, 0); + } + else + { + /* Use current address instead of init_addr for reduced reg pressure. */ + magic = dataref_ptr; + } - /* Handle use. */ - op = TREE_OPERAND (stmt, 1); - data_ref = vect_create_data_ref (stmt, bsi); - /* Arguments are ready. create the new vector stmt. */ - *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - TREE_OPERAND (*vec_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + /* <4> Create msq = phi in loop */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + msq = make_ssa_name (vec_dest, NULL_TREE); + phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ + SSA_NAME_DEF_STMT (msq) = phi_stmt; + add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop)); + add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop)); + + /* <5> Create in loop */ + vec_dest = vect_create_destination_var (scalar_dest, vectype); + new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic); + new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + TREE_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + } + + *vec_stmt = new_stmt; return true; } @@ -2726,7 +2923,7 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) FOR NOW: No transformation is actually performed. TODO. */ static void -vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED) +vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) { /* This pass will require a cost model to guide it whether to apply peeling @@ -2824,7 +3021,8 @@ static bool vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); + /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/ + unsigned int i; if (vect_debug_details (NULL)) @@ -2864,6 +3062,11 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) } } + /* The vectorizer now supports misaligned loads, so we don't fail anymore + in the presence of a misaligned read dataref. For some targets however + it may be preferable not to vectorize in such a case as misaligned + accesses are very costly. This should be considered in the future. */ +/* for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); @@ -2875,6 +3078,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) return false; } } +*/ return true; } @@ -3158,7 +3362,6 @@ vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read, case ARRAY_REF: offset = size_zero_node; - array_base = TREE_OPERAND (memref, 0); /* Store the array base in the stmt info. For one dimensional array ref a[i], the base is a, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 405ecb2..37c5706 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -119,6 +119,7 @@ vinfo_for_stmt (tree stmt) /* Info on data references alignment. */ /*-----------------------------------------------------------------*/ +/* The misalignment of the memory access in bytes. */ #define DR_MISALIGNMENT(DR) (DR)->aux static inline bool diff --git a/gcc/tree.c b/gcc/tree.c index 98ab924..a0cb800 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -1493,6 +1493,8 @@ staticp (tree arg) case BIT_FIELD_REF: return NULL; + case MISALIGNED_INDIRECT_REF: + case ALIGN_INDIRECT_REF: case INDIRECT_REF: return TREE_CONSTANT (TREE_OPERAND (arg, 0)) ? arg : NULL; @@ -2412,6 +2414,8 @@ build1_stat (enum tree_code code, tree type, tree node MEM_STAT_DECL) TREE_READONLY (t) = 0; break; + case MISALIGNED_INDIRECT_REF: + case ALIGN_INDIRECT_REF: case INDIRECT_REF: /* Whether a dereference is readonly has nothing to do with whether its operand is readonly. */ diff --git a/gcc/tree.def b/gcc/tree.def index fa58aba..4c1f986 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -366,6 +366,17 @@ DEFTREECODE (BIT_FIELD_REF, "bit_field_ref", tcc_reference, 3) /* C unary `*' or Pascal `^'. One operand, an expression for a pointer. */ DEFTREECODE (INDIRECT_REF, "indirect_ref", tcc_reference, 1) +/* Like above, but aligns the referenced address (i.e, if the address + in P is not aligned on TYPE_ALIGN boundary, then &(*P) != P). */ +DEFTREECODE (ALIGN_INDIRECT_REF, "align_indirect_ref", tcc_reference, 1) + +/* Same as INDIRECT_REF, but also specifies the alignment of the referenced + address: + Operand 0 is the referenced address (a pointer); + Operand 1 is an INTEGER_CST which represents the alignment of the address, + or 0 if the alignment is unknown. */ +DEFTREECODE (MISALIGNED_INDIRECT_REF, "misaligned_indirect_ref", tcc_reference, 2) + /* Array indexing. Operand 0 is the array; operand 1 is a (single) array index. Operand 2, if present, is a copy of TYPE_MIN_VALUE of the index. @@ -886,6 +897,28 @@ DEFTREECODE (TREE_BINFO, "tree_binfo", tcc_exceptional, 0) Operand 1 is the size of the type in the expression. */ DEFTREECODE (WITH_SIZE_EXPR, "with_size_expr", tcc_expression, 2) +/* Extract elements from two input vectors Operand 0 and Operand 1 + size VS, according to the offset OFF defined by Operand 2 as + follows: + If OFF > 0, the last VS - OFF elements of vector OP0 are concatenated to + the first OFF elements of the vector OP1. + If OFF == 0, then the returned vector is OP1. + On different targets OFF may take different forms; It can be an address, in + which case its low log2(VS)-1 bits define the offset, or it can be a mask + generated by the builtin targetm.vectorize.mask_for_load_builtin_decl. */ +DEFTREECODE (REALIGN_LOAD_EXPR, "realign_load", tcc_expression, 3) + +/* Extract elements from two input vectors Operand 0 and Operand 1 + size VS, according to the offset OFF defined by Operand 2 as + follows: + If OFF > 0, the last OFF elements of vector OP0 are concatenated to + the first VS - OFF elements of the vector OP1. + If OFF == 0, then the returned vector is OP0. + On different targets OFF may take different forms; It can be an address, in + which case its low log2(VS)-1 bits define the offset, or it can be a mask + generated by the builtin targetm.vectorize.mask_for_store_builtin_decl. */ +DEFTREECODE (REALIGN_STORE_EXPR, "realign_store", tcc_expression, 3) + /* Local variables: mode:c diff --git a/gcc/tree.h b/gcc/tree.h index db2a4fb..a913d74 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -390,7 +390,7 @@ struct tree_common GTY(()) ..._TYPE TREE_THIS_NOTRAP in - INDIRECT_REF, ARRAY_REF, ARRAY_RANGE_REF + (ALIGN/MISALIGNED_)INDIRECT_REF, ARRAY_REF, ARRAY_RANGE_REF deprecated_flag: @@ -901,7 +901,8 @@ extern void tree_operand_check_failed (int, enum tree_code, /* Nonzero means this node will not trap. In an INDIRECT_REF, means accessing the memory pointed to won't generate a trap. However, this only applies to an object when used appropriately: it doesn't - mean that writing a READONLY mem won't trap. + mean that writing a READONLY mem won't trap. Similarly for + ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF. In ARRAY_REF and ARRAY_RANGE_REF means that we know that the index (or slice of the array) always belongs to the range of the array. @@ -1145,8 +1146,9 @@ struct tree_vec GTY(()) #define TREE_OPERAND(NODE, I) TREE_OPERAND_CHECK (NODE, I) #define TREE_COMPLEXITY(NODE) (EXPR_CHECK (NODE)->exp.complexity) -/* In INDIRECT_REF. */ -#define REF_ORIGINAL(NODE) TREE_CHAIN (TREE_CHECK (NODE, INDIRECT_REF)) +/* In INDIRECT_REF, ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF. */ +#define REF_ORIGINAL(NODE) TREE_CHAIN (TREE_CHECK3 (NODE, \ + INDIRECT_REF, ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF)) /* In a LABELED_BLOCK_EXPR node. */ #define LABELED_BLOCK_LABEL(NODE) \ -- cgit v1.1