diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/Makefile.in | 21 | ||||
-rw-r--r-- | gcc/tree-complex.c | 489 | ||||
-rw-r--r-- | gcc/tree-optimize.c | 3 | ||||
-rw-r--r-- | gcc/tree-pass.h | 3 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 531 |
6 files changed, 573 insertions, 493 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 34089ef..5dba801 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,24 @@ 2005-06-02 Richard Henderson <rth@redhat.com> + * Makefile.in (tree-vect-generic.o): New. + (OBJS-common, GTFILES, s-gtype): Add it. + * tree-complex.c (build_replicated_const, vector_inner_type, + vector_last_type, vector_last_nunits, build_word_mode_vector_type, + elem_op_func, tree_vec_extract, do_unop, do_binop, do_plus_minus, + do_negate, expand_vector_piecewise, expand_vector_parallel, + expand_vector_addition, expand_vector_operation, + type_for_widest_vector_mode, expand_vector_operations_1, + gate_expand_vector_operations, expand_vector_operations, + pass_lower_vector_ssa): Move to tree-vect-generic.c. + (tree_lower_complex): Rename from tree_lower_operations. + (pass_lower_complex): Rename from pass_pre_expand. + * tree-vect-generic.c: New file. + * tree-pass.h (pass_lower_complex): Rename from pass_pre_expand. + (pass_lower_vector): New. + * tree-optimize.c (init_tree_optimization_passes): Update to match. + +2005-06-02 Richard Henderson <rth@redhat.com> + * modulo-sched.c (doloop_register_get): Protect against doloop_end not defined. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 716e47c..30bdf4f 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -925,14 +925,15 @@ C_OBJS = c-lang.o stub-objc.o $(C_AND_OBJC_OBJS) OBJS-common = \ tree-chrec.o tree-scalar-evolution.o tree-data-ref.o \ tree-cfg.o tree-dfa.o tree-eh.o tree-ssa.o tree-optimize.o tree-gimple.o \ - gimplify.o tree-pretty-print.o tree-into-ssa.o \ + gimplify.o tree-pretty-print.o tree-into-ssa.o \ tree-outof-ssa.o tree-ssa-ccp.o tree-vn.o tree-ssa-uncprop.o \ - tree-ssa-dce.o tree-ssa-copy.o tree-nrv.o tree-ssa-copyrename.o \ + tree-ssa-dce.o tree-ssa-copy.o tree-nrv.o tree-ssa-copyrename.o \ tree-ssa-pre.o tree-ssa-live.o tree-ssa-operands.o tree-ssa-alias.o \ tree-ssa-phiopt.o tree-ssa-forwprop.o tree-nested.o tree-ssa-dse.o \ tree-ssa-dom.o domwalk.o tree-tailcall.o gimple-low.o tree-iterator.o \ - tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \ - tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o \ + tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o \ + tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \ + tree-ssa-loop-manip.o tree-ssa-threadupdate.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-math-opts.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ @@ -2115,7 +2116,12 @@ tree-sra.o : tree-sra.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(RTL_H) \ tree-complex.o : tree-complex.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \ $(TM_H) $(TREE_FLOW_H) $(TREE_GIMPLE_H) tree-iterator.h tree-pass.h \ $(FLAGS_H) $(OPTABS_H) $(RTL_H) $(MACHMODE_H) $(EXPR_H) \ - langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-complex.h $(GGC_H) \ + langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) $(GGC_H) \ + coretypes.h insn-codes.h +tree-vect-generic.o : tree-vect-generic.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \ + $(TM_H) $(TREE_FLOW_H) $(TREE_GIMPLE_H) tree-iterator.h tree-pass.h \ + $(FLAGS_H) $(OPTABS_H) $(RTL_H) $(MACHMODE_H) $(EXPR_H) \ + langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-vect-generic.h $(GGC_H) \ coretypes.h insn-codes.h df.o : df.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ insn-config.h $(RECOG_H) function.h $(REGS_H) alloc-pool.h hard-reg-set.h \ @@ -2611,7 +2617,7 @@ GTFILES = $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-phinodes.c $(srcdir)/tree-cfg.c \ $(srcdir)/tree-dfa.c $(srcdir)/tree-ssa-propagate.c \ $(srcdir)/tree-iterator.c $(srcdir)/gimplify.c \ - $(srcdir)/tree-chrec.h $(srcdir)/tree-complex.c \ + $(srcdir)/tree-chrec.h $(srcdir)/tree-vect-generic.c \ $(srcdir)/tree-ssa-operands.h $(srcdir)/tree-ssa-operands.c \ $(srcdir)/tree-profile.c $(srcdir)/rtl-profile.c $(srcdir)/tree-nested.c \ $(out_file) \ @@ -2630,8 +2636,7 @@ gt-expr.h gt-sdbout.h gt-optabs.h gt-bitmap.h gt-dojump.h \ gt-dwarf2out.h gt-reg-stack.h gt-dwarf2asm.h \ gt-dbxout.h gt-c-common.h gt-c-decl.h gt-c-parser.h \ gt-c-pragma.h gtype-c.h gt-cfglayout.h \ -gt-tree-mudflap.h gt-tree-complex.h \ -gt-tree-profile.h \ +gt-tree-mudflap.h gt-tree-vect-generic.h gt-tree-profile.h \ gt-tree-ssanames.h gt-tree-iterator.h gt-gimplify.h \ gt-tree-phinodes.h gt-tree-nested.h \ gt-tree-ssa-operands.h gt-tree-ssa-propagate.h \ diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c index 7d7312d..4dd217d 100644 --- a/gcc/tree-complex.c +++ b/gcc/tree-complex.c @@ -1,4 +1,4 @@ -/* Lower complex number and vector operations to scalar operations. +/* Lower complex number operations to scalar operations. Copyright (C) 2004, 2005 Free Software Foundation, Inc. This file is part of GCC. @@ -563,462 +563,9 @@ expand_complex_operations_1 (block_stmt_iterator *bsi) } update_stmt_if_modified (stmt); } - -/* Build a constant of type TYPE, made of VALUE's bits replicated - every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ -static tree -build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) -{ - int width = tree_low_cst (TYPE_SIZE (inner_type), 1); - int n = HOST_BITS_PER_WIDE_INT / width; - unsigned HOST_WIDE_INT low, high, mask; - tree ret; - - gcc_assert (n); - - if (width == HOST_BITS_PER_WIDE_INT) - low = value; - else - { - mask = ((HOST_WIDE_INT)1 << width) - 1; - low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); - } - - if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) - low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; - else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) - high = 0; - else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) - high = low; - else - gcc_unreachable (); - - ret = build_int_cst_wide (type, low, high); - return ret; -} - -static GTY(()) tree vector_inner_type; -static GTY(()) tree vector_last_type; -static GTY(()) int vector_last_nunits; - -/* Return a suitable vector types made of SUBPARTS units each of mode - "word_mode" (the global variable). */ -static tree -build_word_mode_vector_type (int nunits) -{ - if (!vector_inner_type) - vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); - else if (vector_last_nunits == nunits) - { - gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); - return vector_last_type; - } - - /* We build a new type, but we canonicalize it nevertheless, - because it still saves some memory. */ - vector_last_nunits = nunits; - vector_last_type = type_hash_canon (nunits, - build_vector_type (vector_inner_type, - nunits)); - return vector_last_type; -} - -typedef tree (*elem_op_func) (block_stmt_iterator *, - tree, tree, tree, tree, tree, enum tree_code); - -static inline tree -tree_vec_extract (block_stmt_iterator *bsi, tree type, - tree t, tree bitsize, tree bitpos) -{ - if (bitpos) - return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos); - - /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will - anyway be stored in memory, so prefer NOP_EXPR. */ - else if (TYPE_MODE (type) == BLKmode) - return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t); - else - return gimplify_build1 (bsi, NOP_EXPR, type, t); -} - -static tree -do_unop (block_stmt_iterator *bsi, tree inner_type, tree a, - tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, - enum tree_code code) -{ - a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); - return gimplify_build1 (bsi, code, inner_type, a); -} - -static tree -do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b, - tree bitpos, tree bitsize, enum tree_code code) -{ - a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); - b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos); - return gimplify_build2 (bsi, code, inner_type, a, b); -} - -/* Expand vector addition to scalars. This does bit twiddling - in order to increase parallelism: - - a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ - (a ^ b) & 0x80808080 - - a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ - (a ^ ~b) & 0x80808080 - - -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) - - This optimization should be done only if 4 vector items or more - fit into a word. */ -static tree -do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b, - tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, - enum tree_code code) -{ - tree inner_type = TREE_TYPE (TREE_TYPE (a)); - unsigned HOST_WIDE_INT max; - tree low_bits, high_bits, a_low, b_low, result_low, signs; - - max = GET_MODE_MASK (TYPE_MODE (inner_type)); - low_bits = build_replicated_const (word_type, inner_type, max >> 1); - high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); - - a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos); - b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); - - signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b); - b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); - if (code == PLUS_EXPR) - a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits); - else - { - a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits); - signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs); - } - - signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); - result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low); - return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); -} - -static tree -do_negate (block_stmt_iterator *bsi, tree word_type, tree b, - tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, - tree bitsize ATTRIBUTE_UNUSED, - enum tree_code code ATTRIBUTE_UNUSED) -{ - tree inner_type = TREE_TYPE (TREE_TYPE (b)); - HOST_WIDE_INT max; - tree low_bits, high_bits, b_low, result_low, signs; - - max = GET_MODE_MASK (TYPE_MODE (inner_type)); - low_bits = build_replicated_const (word_type, inner_type, max >> 1); - high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); - - b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); - - b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); - signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b); - signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); - result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low); - return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); -} - -/* Expand a vector operation to scalars, by using many operations - whose type is the vector type's inner type. */ -static tree -expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f, - tree type, tree inner_type, - tree a, tree b, enum tree_code code) -{ - tree head, *chain = &head; - tree part_width = TYPE_SIZE (inner_type); - tree index = bitsize_int (0); - int nunits = TYPE_VECTOR_SUBPARTS (type); - int delta = tree_low_cst (part_width, 1) - / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); - int i; - - for (i = 0; i < nunits; - i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) - { - tree result = f (bsi, inner_type, a, b, index, part_width, code); - *chain = tree_cons (NULL_TREE, result, NULL_TREE); - chain = &TREE_CHAIN (*chain); - } - - return build1 (CONSTRUCTOR, type, head); -} - -/* Expand a vector operation to scalars with the freedom to use - a scalar integer type, or to use a different size for the items - in the vector type. */ -static tree -expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type, - tree a, tree b, - enum tree_code code) -{ - tree result, compute_type; - enum machine_mode mode; - int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; - - /* We have three strategies. If the type is already correct, just do - the operation an element at a time. Else, if the vector is wider than - one word, do it a word at a time; finally, if the vector is smaller - than one word, do it as a scalar. */ - if (TYPE_MODE (TREE_TYPE (type)) == word_mode) - return expand_vector_piecewise (bsi, f, - type, TREE_TYPE (type), - a, b, code); - else if (n_words > 1) - { - tree word_type = build_word_mode_vector_type (n_words); - result = expand_vector_piecewise (bsi, f, - word_type, TREE_TYPE (word_type), - a, b, code); - result = gimplify_val (bsi, word_type, result); - } - else - { - /* Use a single scalar operation with a mode no wider than word_mode. */ - mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); - compute_type = lang_hooks.types.type_for_mode (mode, 1); - result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); - } - - return result; -} - -/* Expand a vector operation to scalars; for integer types we can use - special bit twiddling tricks to do the sums a word at a time, using - function F_PARALLEL instead of F. These tricks are done only if - they can process at least four items, that is, only if the vector - holds at least four items and if a word can hold four items. */ -static tree -expand_vector_addition (block_stmt_iterator *bsi, - elem_op_func f, elem_op_func f_parallel, - tree type, tree a, tree b, enum tree_code code) -{ - int parts_per_word = UNITS_PER_WORD - / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); - - if (INTEGRAL_TYPE_P (TREE_TYPE (type)) - && parts_per_word >= 4 - && TYPE_VECTOR_SUBPARTS (type) >= 4) - return expand_vector_parallel (bsi, f_parallel, - type, a, b, code); - else - return expand_vector_piecewise (bsi, f, - type, TREE_TYPE (type), - a, b, code); -} - -static tree -expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type, - tree rhs, enum tree_code code) -{ - enum machine_mode compute_mode = TYPE_MODE (compute_type); - - /* If the compute mode is not a vector mode (hence we are not decomposing - a BLKmode vector to smaller, hardware-supported vectors), we may want - to expand the operations in parallel. */ - if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT - && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) - switch (code) - { - case PLUS_EXPR: - case MINUS_EXPR: - if (!TYPE_TRAP_SIGNED (type)) - return expand_vector_addition (bsi, do_binop, do_plus_minus, type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); - break; - - case NEGATE_EXPR: - if (!TYPE_TRAP_SIGNED (type)) - return expand_vector_addition (bsi, do_unop, do_negate, type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); - break; - - case BIT_AND_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - return expand_vector_parallel (bsi, do_binop, type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); - - case BIT_NOT_EXPR: - return expand_vector_parallel (bsi, do_unop, type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); - - default: - break; - } - - if (TREE_CODE_CLASS (code) == tcc_unary) - return expand_vector_piecewise (bsi, do_unop, type, compute_type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); - else - return expand_vector_piecewise (bsi, do_binop, type, compute_type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); -} - -/* Return a type for the widest vector mode whose components are of mode - INNER_MODE, or NULL_TREE if none is found. */ -static tree -type_for_widest_vector_mode (enum machine_mode inner_mode, optab op) -{ - enum machine_mode best_mode = VOIDmode, mode; - int best_nunits = 0; - - if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) - mode = MIN_MODE_VECTOR_FLOAT; - else - mode = MIN_MODE_VECTOR_INT; - - for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) - if (GET_MODE_INNER (mode) == inner_mode - && GET_MODE_NUNITS (mode) > best_nunits - && op->handlers[mode].insn_code != CODE_FOR_nothing) - best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); - - if (best_mode == VOIDmode) - return NULL_TREE; - else - return lang_hooks.types.type_for_mode (best_mode, 1); -} - -/* Process one statement. If we identify a vector operation, expand it. */ static void -expand_vector_operations_1 (block_stmt_iterator *bsi) -{ - tree stmt = bsi_stmt (*bsi); - tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type; - enum tree_code code; - enum machine_mode compute_mode; - optab op; - - switch (TREE_CODE (stmt)) - { - case RETURN_EXPR: - stmt = TREE_OPERAND (stmt, 0); - if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR) - return; - - /* FALLTHRU */ - - case MODIFY_EXPR: - p_lhs = &TREE_OPERAND (stmt, 0); - p_rhs = &TREE_OPERAND (stmt, 1); - lhs = *p_lhs; - rhs = *p_rhs; - break; - - default: - return; - } - - type = TREE_TYPE (rhs); - if (TREE_CODE (type) != VECTOR_TYPE) - return; - - code = TREE_CODE (rhs); - if (TREE_CODE_CLASS (code) != tcc_unary - && TREE_CODE_CLASS (code) != tcc_binary) - return; - - if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR) - return; - - gcc_assert (code != CONVERT_EXPR); - op = optab_for_tree_code (code, type); - - /* Optabs will try converting a negation into a subtraction, so - look for it as well. TODO: negation of floating-point vectors - might be turned into an exclusive OR toggling the sign bit. */ - if (op == NULL - && code == NEGATE_EXPR - && INTEGRAL_TYPE_P (TREE_TYPE (type))) - op = optab_for_tree_code (MINUS_EXPR, type); - - /* For very wide vectors, try using a smaller vector mode. */ - compute_type = type; - if (TYPE_MODE (type) == BLKmode && op) - { - tree vector_compute_type - = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op); - if (vector_compute_type != NULL_TREE) - compute_type = vector_compute_type; - } - - /* If we are breaking a BLKmode vector into smaller pieces, - type_for_widest_vector_mode has already looked into the optab, - so skip these checks. */ - if (compute_type == type) - { - compute_mode = TYPE_MODE (compute_type); - if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT - || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT) - && op != NULL - && op->handlers[compute_mode].insn_code != CODE_FOR_nothing) - return; - else - /* There is no operation in hardware, so fall back to scalars. */ - compute_type = TREE_TYPE (type); - } - - rhs = expand_vector_operation (bsi, type, compute_type, rhs, code); - if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) - *p_rhs = rhs; - else - { - /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will - be stored in memory anyway, so prefer NOP_EXPR. We should also try - performing the VIEW_CONVERT_EXPR on the left side of the - assignment. */ - if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode) - *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs); - else - *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs); - } - - mark_stmt_modified (bsi_stmt (*bsi)); -} - -/* Use this to lower vector operations introduced by the vectorizer, - if it may need the bit-twiddling tricks implemented in this file. */ - -static bool -gate_expand_vector_operations (void) -{ - return flag_tree_vectorize != 0; -} - -static void -expand_vector_operations (void) -{ - block_stmt_iterator bsi; - basic_block bb; - - FOR_EACH_BB (bb) - { - for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) - { - expand_vector_operations_1 (&bsi); - update_stmt_if_modified (bsi_stmt (bsi)); - } - } -} - -static void -tree_lower_operations (void) +tree_lower_complex (void) { int old_last_basic_block = last_basic_block; block_stmt_iterator bsi; @@ -1029,38 +576,16 @@ tree_lower_operations (void) if (bb->index >= old_last_basic_block) continue; for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) - { - expand_complex_operations_1 (&bsi); - expand_vector_operations_1 (&bsi); - } + expand_complex_operations_1 (&bsi); } } -struct tree_opt_pass pass_lower_vector_ssa = -{ - "veclower", /* name */ - gate_expand_vector_operations, /* gate */ - expand_vector_operations, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - 0, /* tv_id */ - PROP_cfg, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ - | TODO_verify_ssa - | TODO_verify_stmts | TODO_verify_flow, - 0 /* letter */ -}; - -struct tree_opt_pass pass_pre_expand = +struct tree_opt_pass pass_lower_complex = { - "oplower", /* name */ + "cplxlower", /* name */ 0, /* gate */ - tree_lower_operations, /* execute */ + tree_lower_complex, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ @@ -1073,5 +598,3 @@ struct tree_opt_pass pass_pre_expand = | TODO_verify_stmts, /* todo_flags_finish */ 0 /* letter */ }; - -#include "gt-tree-complex.h" diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index 4d02e55..8e02f45 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -375,7 +375,8 @@ init_tree_optimization_passes (void) NEXT_PASS (pass_lower_cf); NEXT_PASS (pass_lower_eh); NEXT_PASS (pass_build_cfg); - NEXT_PASS (pass_pre_expand); + NEXT_PASS (pass_lower_complex); + NEXT_PASS (pass_lower_vector); NEXT_PASS (pass_warn_function_return); NEXT_PASS (pass_tree_profile); *p = NULL; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 8af9f83..2c255ab 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -192,7 +192,8 @@ extern struct tree_opt_pass pass_may_alias; extern struct tree_opt_pass pass_split_crit_edges; extern struct tree_opt_pass pass_pre; extern struct tree_opt_pass pass_profile; -extern struct tree_opt_pass pass_pre_expand; +extern struct tree_opt_pass pass_lower_complex; +extern struct tree_opt_pass pass_lower_vector; extern struct tree_opt_pass pass_lower_vector_ssa; extern struct tree_opt_pass pass_fold_builtins; extern struct tree_opt_pass pass_stdarg; diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c new file mode 100644 index 0000000..2da1ed2 --- /dev/null +++ b/gcc/tree-vect-generic.c @@ -0,0 +1,531 @@ +/* Lower vector operations to scalar operations. + Copyright (C) 2004, 2005 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tm.h" +#include "rtl.h" +#include "expr.h" +#include "insn-codes.h" +#include "diagnostic.h" +#include "optabs.h" +#include "machmode.h" +#include "langhooks.h" +#include "tree-flow.h" +#include "tree-gimple.h" +#include "tree-iterator.h" +#include "tree-pass.h" +#include "flags.h" +#include "ggc.h" + + +/* Build a constant of type TYPE, made of VALUE's bits replicated + every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ +static tree +build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) +{ + int width = tree_low_cst (TYPE_SIZE (inner_type), 1); + int n = HOST_BITS_PER_WIDE_INT / width; + unsigned HOST_WIDE_INT low, high, mask; + tree ret; + + gcc_assert (n); + + if (width == HOST_BITS_PER_WIDE_INT) + low = value; + else + { + mask = ((HOST_WIDE_INT)1 << width) - 1; + low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); + } + + if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) + low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; + else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) + high = 0; + else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) + high = low; + else + gcc_unreachable (); + + ret = build_int_cst_wide (type, low, high); + return ret; +} + +static GTY(()) tree vector_inner_type; +static GTY(()) tree vector_last_type; +static GTY(()) int vector_last_nunits; + +/* Return a suitable vector types made of SUBPARTS units each of mode + "word_mode" (the global variable). */ +static tree +build_word_mode_vector_type (int nunits) +{ + if (!vector_inner_type) + vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); + else if (vector_last_nunits == nunits) + { + gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); + return vector_last_type; + } + + /* We build a new type, but we canonicalize it nevertheless, + because it still saves some memory. */ + vector_last_nunits = nunits; + vector_last_type = type_hash_canon (nunits, + build_vector_type (vector_inner_type, + nunits)); + return vector_last_type; +} + +typedef tree (*elem_op_func) (block_stmt_iterator *, + tree, tree, tree, tree, tree, enum tree_code); + +static inline tree +tree_vec_extract (block_stmt_iterator *bsi, tree type, + tree t, tree bitsize, tree bitpos) +{ + if (bitpos) + return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos); + + /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will + anyway be stored in memory, so prefer NOP_EXPR. */ + else if (TYPE_MODE (type) == BLKmode) + return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t); + else + return gimplify_build1 (bsi, NOP_EXPR, type, t); +} + +static tree +do_unop (block_stmt_iterator *bsi, tree inner_type, tree a, + tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, + enum tree_code code) +{ + a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); + return gimplify_build1 (bsi, code, inner_type, a); +} + +static tree +do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b, + tree bitpos, tree bitsize, enum tree_code code) +{ + a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); + b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos); + return gimplify_build2 (bsi, code, inner_type, a, b); +} + +/* Expand vector addition to scalars. This does bit twiddling + in order to increase parallelism: + + a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ + (a ^ b) & 0x80808080 + + a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ + (a ^ ~b) & 0x80808080 + + -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) + + This optimization should be done only if 4 vector items or more + fit into a word. */ +static tree +do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b, + tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, + enum tree_code code) +{ + tree inner_type = TREE_TYPE (TREE_TYPE (a)); + unsigned HOST_WIDE_INT max; + tree low_bits, high_bits, a_low, b_low, result_low, signs; + + max = GET_MODE_MASK (TYPE_MODE (inner_type)); + low_bits = build_replicated_const (word_type, inner_type, max >> 1); + high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); + + a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos); + b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); + + signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b); + b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); + if (code == PLUS_EXPR) + a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits); + else + { + a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits); + signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs); + } + + signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); + result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low); + return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); +} + +static tree +do_negate (block_stmt_iterator *bsi, tree word_type, tree b, + tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, + tree bitsize ATTRIBUTE_UNUSED, + enum tree_code code ATTRIBUTE_UNUSED) +{ + tree inner_type = TREE_TYPE (TREE_TYPE (b)); + HOST_WIDE_INT max; + tree low_bits, high_bits, b_low, result_low, signs; + + max = GET_MODE_MASK (TYPE_MODE (inner_type)); + low_bits = build_replicated_const (word_type, inner_type, max >> 1); + high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); + + b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); + + b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); + signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b); + signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); + result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low); + return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); +} + +/* Expand a vector operation to scalars, by using many operations + whose type is the vector type's inner type. */ +static tree +expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f, + tree type, tree inner_type, + tree a, tree b, enum tree_code code) +{ + tree head, *chain = &head; + tree part_width = TYPE_SIZE (inner_type); + tree index = bitsize_int (0); + int nunits = TYPE_VECTOR_SUBPARTS (type); + int delta = tree_low_cst (part_width, 1) + / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); + int i; + + for (i = 0; i < nunits; + i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) + { + tree result = f (bsi, inner_type, a, b, index, part_width, code); + *chain = tree_cons (NULL_TREE, result, NULL_TREE); + chain = &TREE_CHAIN (*chain); + } + + return build1 (CONSTRUCTOR, type, head); +} + +/* Expand a vector operation to scalars with the freedom to use + a scalar integer type, or to use a different size for the items + in the vector type. */ +static tree +expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type, + tree a, tree b, + enum tree_code code) +{ + tree result, compute_type; + enum machine_mode mode; + int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; + + /* We have three strategies. If the type is already correct, just do + the operation an element at a time. Else, if the vector is wider than + one word, do it a word at a time; finally, if the vector is smaller + than one word, do it as a scalar. */ + if (TYPE_MODE (TREE_TYPE (type)) == word_mode) + return expand_vector_piecewise (bsi, f, + type, TREE_TYPE (type), + a, b, code); + else if (n_words > 1) + { + tree word_type = build_word_mode_vector_type (n_words); + result = expand_vector_piecewise (bsi, f, + word_type, TREE_TYPE (word_type), + a, b, code); + result = gimplify_val (bsi, word_type, result); + } + else + { + /* Use a single scalar operation with a mode no wider than word_mode. */ + mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); + compute_type = lang_hooks.types.type_for_mode (mode, 1); + result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); + } + + return result; +} + +/* Expand a vector operation to scalars; for integer types we can use + special bit twiddling tricks to do the sums a word at a time, using + function F_PARALLEL instead of F. These tricks are done only if + they can process at least four items, that is, only if the vector + holds at least four items and if a word can hold four items. */ +static tree +expand_vector_addition (block_stmt_iterator *bsi, + elem_op_func f, elem_op_func f_parallel, + tree type, tree a, tree b, enum tree_code code) +{ + int parts_per_word = UNITS_PER_WORD + / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); + + if (INTEGRAL_TYPE_P (TREE_TYPE (type)) + && parts_per_word >= 4 + && TYPE_VECTOR_SUBPARTS (type) >= 4) + return expand_vector_parallel (bsi, f_parallel, + type, a, b, code); + else + return expand_vector_piecewise (bsi, f, + type, TREE_TYPE (type), + a, b, code); +} + +static tree +expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type, + tree rhs, enum tree_code code) +{ + enum machine_mode compute_mode = TYPE_MODE (compute_type); + + /* If the compute mode is not a vector mode (hence we are not decomposing + a BLKmode vector to smaller, hardware-supported vectors), we may want + to expand the operations in parallel. */ + if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT + && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + if (!TYPE_TRAP_SIGNED (type)) + return expand_vector_addition (bsi, do_binop, do_plus_minus, type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); + break; + + case NEGATE_EXPR: + if (!TYPE_TRAP_SIGNED (type)) + return expand_vector_addition (bsi, do_unop, do_negate, type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + break; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + return expand_vector_parallel (bsi, do_binop, type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); + + case BIT_NOT_EXPR: + return expand_vector_parallel (bsi, do_unop, type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + + default: + break; + } + + if (TREE_CODE_CLASS (code) == tcc_unary) + return expand_vector_piecewise (bsi, do_unop, type, compute_type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + else + return expand_vector_piecewise (bsi, do_binop, type, compute_type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); +} + +/* Return a type for the widest vector mode whose components are of mode + INNER_MODE, or NULL_TREE if none is found. */ +static tree +type_for_widest_vector_mode (enum machine_mode inner_mode, optab op) +{ + enum machine_mode best_mode = VOIDmode, mode; + int best_nunits = 0; + + if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) + mode = MIN_MODE_VECTOR_FLOAT; + else + mode = MIN_MODE_VECTOR_INT; + + for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) + if (GET_MODE_INNER (mode) == inner_mode + && GET_MODE_NUNITS (mode) > best_nunits + && op->handlers[mode].insn_code != CODE_FOR_nothing) + best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); + + if (best_mode == VOIDmode) + return NULL_TREE; + else + return lang_hooks.types.type_for_mode (best_mode, 1); +} + +/* Process one statement. If we identify a vector operation, expand it. */ + +static void +expand_vector_operations_1 (block_stmt_iterator *bsi) +{ + tree stmt = bsi_stmt (*bsi); + tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type; + enum tree_code code; + enum machine_mode compute_mode; + optab op; + + switch (TREE_CODE (stmt)) + { + case RETURN_EXPR: + stmt = TREE_OPERAND (stmt, 0); + if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR) + return; + + /* FALLTHRU */ + + case MODIFY_EXPR: + p_lhs = &TREE_OPERAND (stmt, 0); + p_rhs = &TREE_OPERAND (stmt, 1); + lhs = *p_lhs; + rhs = *p_rhs; + break; + + default: + return; + } + + type = TREE_TYPE (rhs); + if (TREE_CODE (type) != VECTOR_TYPE) + return; + + code = TREE_CODE (rhs); + if (TREE_CODE_CLASS (code) != tcc_unary + && TREE_CODE_CLASS (code) != tcc_binary) + return; + + if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR) + return; + + gcc_assert (code != CONVERT_EXPR); + op = optab_for_tree_code (code, type); + + /* Optabs will try converting a negation into a subtraction, so + look for it as well. TODO: negation of floating-point vectors + might be turned into an exclusive OR toggling the sign bit. */ + if (op == NULL + && code == NEGATE_EXPR + && INTEGRAL_TYPE_P (TREE_TYPE (type))) + op = optab_for_tree_code (MINUS_EXPR, type); + + /* For very wide vectors, try using a smaller vector mode. */ + compute_type = type; + if (TYPE_MODE (type) == BLKmode && op) + { + tree vector_compute_type + = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op); + if (vector_compute_type != NULL_TREE) + compute_type = vector_compute_type; + } + + /* If we are breaking a BLKmode vector into smaller pieces, + type_for_widest_vector_mode has already looked into the optab, + so skip these checks. */ + if (compute_type == type) + { + compute_mode = TYPE_MODE (compute_type); + if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT) + && op != NULL + && op->handlers[compute_mode].insn_code != CODE_FOR_nothing) + return; + else + /* There is no operation in hardware, so fall back to scalars. */ + compute_type = TREE_TYPE (type); + } + + rhs = expand_vector_operation (bsi, type, compute_type, rhs, code); + if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + *p_rhs = rhs; + else + { + /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will + be stored in memory anyway, so prefer NOP_EXPR. We should also try + performing the VIEW_CONVERT_EXPR on the left side of the + assignment. */ + if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode) + *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs); + else + *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs); + } + + mark_stmt_modified (bsi_stmt (*bsi)); +} + +/* Use this to lower vector operations introduced by the vectorizer, + if it may need the bit-twiddling tricks implemented in this file. */ + +static bool +gate_expand_vector_operations (void) +{ + return flag_tree_vectorize != 0; +} + +static void +expand_vector_operations (void) +{ + block_stmt_iterator bsi; + basic_block bb; + + FOR_EACH_BB (bb) + { + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + expand_vector_operations_1 (&bsi); + update_stmt_if_modified (bsi_stmt (bsi)); + } + } +} + +struct tree_opt_pass pass_lower_vector = +{ + "veclower", /* name */ + 0, /* gate */ + expand_vector_operations, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_ggc_collect + | TODO_verify_stmts, /* todo_flags_finish */ + 0 /* letter */ +}; + +struct tree_opt_pass pass_lower_vector_ssa = +{ + "veclower2", /* name */ + gate_expand_vector_operations, /* gate */ + expand_vector_operations, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ + | TODO_verify_ssa + | TODO_verify_stmts | TODO_verify_flow, + 0 /* letter */ +}; + +#include "gt-tree-vect-generic.h" |