aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSameera Deshpande <sameerad@gcc.gnu.org>2017-03-31 14:39:57 +0530
committerSameera Deshpande <sameerad@gcc.gnu.org>2017-03-31 14:39:57 +0530
commit066fa0ff72450cd7b6489af28893ae7eb35a59ce (patch)
tree571a410686dd9d3018b1bff95ed142beccbb7466
parent08a23481b346fa7ad671a6b5ec8351ac04117fcb (diff)
downloadgcc-066fa0ff72450cd7b6489af28893ae7eb35a59ce.zip
gcc-066fa0ff72450cd7b6489af28893ae7eb35a59ce.tar.gz
gcc-066fa0ff72450cd7b6489af28893ae7eb35a59ce.tar.bz2
stage 2...
stage 2: implementation of k-arity promotion/reduction in the series "Improving effectiveness and generality of autovectorization using unified representation". The permute nodes within primitive reorder tree(PRT) generated from input program can have any arity depending upon stride of accesses. However, the target cannot have instructions to support all arities. Hence, we need to promote or reduce the arity of PRT to enable successful tree tiling. In classic autovectorization, if vectorization stride > 2, arity reduction is performed by generating cascaded extract and interleave instructions as described by "Auto-vectorization of Interleaved Data for SIMD" by D. Nuzman, I. Rosen and A. Zaks. Moreover, to enable SLP across loop, "Loop-aware SLP in GCC" by D. Nuzman, I. Rosen and A. Zaks unrolls loop till stride = vector size. k-arity reduction/promotion algorithm makes use of modulo arithmetic to generate PRT of desired arity for both above-mentioned cases. Single ILV node of arity k can be reduced into cascaded ILV nodes with single node of arity m with children of arity k/m such that ith child of original ILV node becomes floor (i/m) th child of (i%m) th child of new parent. Single EXTR node with k parts and i selector can be reduced into cascaded EXTR nodes such that parent EXTR node has m parts and i/(k/m) selection on child EXTR node with k/m parts and i % (k/m) selection. Similarly, loop unrolling to get desired arity m can be represented as arity promotion from k to m. Single ILV node of arity k can be promoted to single ILV node of arity m by adding extraction with m/k parts and selection i/k of i%k the child of original tree as ith child of new ILV node. To enable loop-aware SLP, we first promote arity of input PRT to maximum vector size permissible on the architecture. This can have impact on vector code size, though performance will be the same. To eliminate redundant ILV and EXTR operations, thereby undoing unneccessary unrolling, we can perform unity reduction optimization: - EXTR_m,x (ILV_M(S1, S2, ... Sm)) => Sx - ILV_m (EXTR_0(S), EXTR_1(S),...EXTR_m-1(S)) => S Later we apply arity promotion reduction algorithm on the output tree to get tree with desired arity. For now, we are supporting target arity = 2, as most of the architectures have support for that. However, the code can be extended for additional arity supports as well. We have also implemented unity reduction optimization which eliminates redundant ILV and EXTR nodes thereby undoing unneccessary unrolling - which can bloat up the code size otherwise. From-SVN: r246610
-rw-r--r--gcc/Makefile.in2
-rw-r--r--gcc/config/mips/mips.h2
-rw-r--r--gcc/tree-vect-unified-common.c277
-rw-r--r--gcc/tree-vect-unified-opts.c656
-rw-r--r--gcc/tree-vect-unified.c414
-rw-r--r--gcc/tree-vect-unified.h132
-rw-r--r--gcc/tree-vectorizer.h4
-rw-r--r--gcc/tree.c14
-rw-r--r--gcc/tree.h2
9 files changed, 1245 insertions, 258 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index fd9e3fa..b399008 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1557,7 +1557,9 @@ OBJS = \
tree-vect-loop-manip.o \
tree-vect-slp.o \
tree-vectorizer.o \
+ tree-vect-unified-common.o \
tree-vect-unified.o \
+ tree-vect-unified-opts.o \
tree-vrp.o \
tree.o \
typed-splay-tree.o \
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 23e1672..68b8b30 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -3467,3 +3467,5 @@ struct GTY(()) machine_function {
#define ENABLE_LD_ST_PAIRS \
(TARGET_LOAD_STORE_PAIRS && (TUNE_P5600 || TUNE_I6400) \
&& !TARGET_MICROMIPS && !TARGET_FIX_24K)
+
+#define MAX_VECTOR_SIZE 16
diff --git a/gcc/tree-vect-unified-common.c b/gcc/tree-vect-unified-common.c
new file mode 100644
index 0000000..90ca6df7c
--- /dev/null
+++ b/gcc/tree-vect-unified-common.c
@@ -0,0 +1,277 @@
+/* lOOP Vectorization using unified representation
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Loop autovectorization using unified representation for permute
+ instructions. */
+#if 1
+
+#ifndef GENERATOR_FILE
+#include "config.h"
+#else
+#include "bconfig.h"
+#endif
+
+#include "system.h"
+#include "coretypes.h"
+
+#ifndef GENERATOR_FILE
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-cfg.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "target.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "optabs-tree.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "tree-eh.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop.h"
+#include "expr.h"
+#include "builtins.h"
+#include "params.h"
+#include "pretty-print.h"
+#include "pretty-print.h"
+#else
+#include "errors.h"
+#include "machmode.h"
+#include "signop.h"
+#include "wide-int.h"
+#include "double-int.h"
+#include "real.h"
+#include "fixed-value.h"
+#include "statistics.h"
+#include "vec.h"
+#include "hash-table.h"
+#include "hash-set.h"
+#include "input.h"
+#include "is-a.h"
+#include "target.h"
+#include "tree-core.h"
+#include "tree-vect-unified.h"
+
+
+#endif
+
+#include "tree-vect-unified.h"
+
+/***** Helper functions for prim-tree creation *****/
+
+/* Function init_primop_node.
+
+ This function creates PRIMOP_TREE node and initializes all its fields to 0.
+*/
+
+struct primop_tree *
+init_primop_node (void)
+{
+ static int pid = 0;
+ struct primop_tree *ptree;
+ ptree = (struct primop_tree *) xcalloc (1, sizeof (struct primop_tree));
+
+ PT_PID (ptree) = pid++;
+ PT_NODE_OP (ptree) = 0;
+ PT_ARITY (ptree) = 0;
+ ptree->children = vNULL;
+ PT_PARENT (ptree) = NULL;
+ PT_ITER_COUNT (ptree) = NULL;
+ PT_VEC_SIZE (ptree) = 0;
+ PT_VEC_TYPE (ptree) = NULL;
+ PT_VEC_INST (ptree) = vNULL;
+ PT_TARGET_COST (ptree) = 0;
+ PT_NUM_INSTANCES (ptree) = 0;
+ PT_LOOP_DEPENDENCES (ptree) = vNULL;
+#ifndef GENERATOR_FILE
+ PT_DEP (ptree) =vNULL;
+#endif
+ PT_DEPTH (ptree) = 0;
+ PT_ATTR_NO (ptree) = 0;
+ PT_AUX (ptree) = -1;
+ memset (&ptree->u, 0, sizeof (ptree->u));
+ return ptree;
+}
+
+/* Function populate_prim_node.
+
+ This function returns PRIMOP_TREE node initialized with given information.
+*/
+
+struct primop_tree *
+populate_prim_node (enum primop_code pcode, tree iter_count,
+ struct primop_tree *parent, gimple *stmt)
+{
+ struct primop_tree *ptree;
+ ptree = init_primop_node ();
+
+ PT_NODE_OP (ptree) = (int) pcode;
+ PT_PARENT (ptree) = parent;
+ PT_ITER_COUNT (ptree) = iter_count;
+
+#ifndef GENERATOR_FILE
+ if (stmt)
+ {
+ PT_VEC_TYPE (ptree) = STMT_ATTR_VECTYPE (stmt);
+ PT_ATTR_NO (ptree) = gimple_uid (stmt);
+ STMT_ATTR_TREE (stmt) = ptree;
+ }
+#endif
+ return ptree;
+}
+
+/* Function create_primTree_combine.
+
+ Create primtree with PCODE as interleave or concat. STMT is statement for
+ which primtree is being created. */
+struct primop_tree *
+create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts,
+ tree iter_count, struct primop_tree *parent)
+{
+ struct primop_tree * ptree;
+
+ ptree = populate_prim_node (pcode, iter_count, parent, stmt);
+ PT_OPERAND_SELECTOR (ptree) = -1;
+ PT_DIVISION (ptree) = parts;
+ PT_VAR_STRIDE (ptree) = NULL;
+
+#ifndef GENERATOR_FILE
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " create_primTree_combine %d : parts - %d\n",
+ PT_PID (ptree), parts);
+ }
+#endif
+
+ return ptree;
+}
+
+/* Function create_primTree_partition.
+
+ Create primtree with PCODE as split or extract. STMT is statement for which
+ primtree is being created. PARTS is number of partitions to be created.
+ SELECTOR is the part being selected. */
+struct primop_tree *
+create_primTree_partition (enum primop_code pcode, gimple *stmt, int parts,
+ int selector, tree iter_count,
+ struct primop_tree *parent)
+{
+ struct primop_tree * ptree;
+
+ ptree = populate_prim_node (pcode, iter_count, parent, stmt);
+ PT_OPERAND_SELECTOR (ptree) = selector;
+ PT_DIVISION (ptree) = parts;
+ PT_VAR_STRIDE (ptree) = NULL;
+
+#ifndef GENERATOR_FILE
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " create_primTree_partition %d : parts - %d selector - %d\n",
+ PT_PID (ptree), parts, selector);
+ }
+#endif
+
+ return ptree;
+}
+
+/* Function add_child_at_index.
+
+ Attach PCHILD node as idx^th child of PNODE. */
+void
+add_child_at_index (struct primop_tree *ptree,
+ struct primop_tree *pchild, int idx)
+{
+ (PT_ARITY (ptree))++;
+ while (idx >= ptree->children.length ())
+ {
+ ptree->children.safe_push (NULL);
+ }
+ PT_CHILD (ptree, idx) = pchild;
+}
+
+/* Function get_child_at_index.
+
+ Get idx^th child of PNODE. */
+struct primop_tree *
+get_child_at_index (struct primop_tree *ptree, int idx)
+{
+#ifndef GENERATOR_FILE
+ gcc_assert (idx < PT_ARITY (ptree));
+#endif
+ return PT_CHILD (ptree, idx);
+}
+
+/* Function duplicate_prim_node.
+
+ This function copies contents of SRC node into new node, and returns pointer
+ to it.
+*/
+
+struct primop_tree *
+duplicate_prim_node (struct primop_tree *src)
+{
+ struct primop_tree *ptree;
+ int i;
+
+ ptree = init_primop_node ();
+
+ PT_NODE_OP (ptree) = PT_NODE_OP (src);
+ PT_PARENT (ptree) = PT_PARENT (src);
+ PT_ITER_COUNT (ptree) = PT_ITER_COUNT (src);
+ PT_VEC_SIZE (ptree) = PT_VEC_SIZE (src);
+ PT_VEC_TYPE (ptree) = PT_VEC_TYPE (src);
+
+ PT_ATTR_NO (ptree) = PT_ATTR_NO (src);
+
+ for (i = 0; i < src->children.length (); i++)
+ {
+ add_child_at_index (ptree, PT_CHILD (src, i), i);
+ }
+
+ memcpy (&ptree->u, &src->u, sizeof (ptree->u));
+
+#ifndef GENERATOR_FILE
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " duplicate_prim_node : pid - %d node_op - %s\n",
+ PT_PID (src), tree_code_name[PT_NODE_OP (ptree)]);
+ }
+#endif
+
+ return ptree;
+}
+
+
+
+#endif
diff --git a/gcc/tree-vect-unified-opts.c b/gcc/tree-vect-unified-opts.c
new file mode 100644
index 0000000..59922ec
--- /dev/null
+++ b/gcc/tree-vect-unified-opts.c
@@ -0,0 +1,656 @@
+/* lOOP Vectorization using unified representation
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Loop autovectorization using unified representation for permute
+ instructions. */
+#if 1
+
+#ifndef GENERATOR_FILE
+#include "config.h"
+#else
+#include "bconfig.h"
+#endif
+
+#include "system.h"
+#include "coretypes.h"
+
+#ifndef GENERATOR_FILE
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-cfg.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "target.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "optabs-tree.h"
+#include "dumpfile.h"
+#include "alias.h"
+#include "tree-eh.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop.h"
+#include "expr.h"
+#include "builtins.h"
+#include "params.h"
+#include "pretty-print.h"
+#include "pretty-print.h"
+#else
+# include "errors.h"
+#include "machmode.h"
+#include "signop.h"
+#include "wide-int.h"
+#include "double-int.h"
+#include "real.h"
+#include "fixed-value.h"
+#include "statistics.h"
+#include "vec.h"
+#include "hash-table.h"
+#include "hash-set.h"
+#include "input.h"
+#include "is-a.h"
+#include "target.h"
+#include "tree-core.h"
+#include "tree-vect-unified.h"
+
+#endif
+
+#include "tree-vect-unified.h"
+
+/* Function ILV_arity_reduction.
+
+ ILV_k^N ILV_m^Nk/m
+ / | \ => / | \
+ / | \ / | \
+ T1 T2 ... Tk / | ... \
+ / | \
+ ILV_k/m^N ILV_k/m^N ILV_k/m^N
+ / | \ / | \ / | \
+ / |...\ / |...\ / |...\
+ T1 Tm+1 Tk-m+1 T2 Tm+2 Tk-m+2 Tm T2m Tk
+
+*/
+
+struct primop_tree *
+ILV_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+
+#ifndef GENERATOR_FILE
+ gcc_assert (from_arity == PT_DIVISION (root));
+
+ new_iter_count = size_binop (MULT_EXPR,
+ fold_convert (ssizetype, PT_ITER_COUNT (root)),
+ ssize_int (from_arity / to_arity));
+#else
+ new_iter_count = NULL;
+#endif
+
+ new_root = create_primTree_combine (POP_ILV, NULL, to_arity,
+ new_iter_count, PT_PARENT (root));
+
+ for (i = 0; i < to_arity; i++)
+ {
+ new_child = create_primTree_combine (POP_ILV, NULL,
+ from_arity / to_arity, PT_ITER_COUNT (root), new_root);
+
+ for (j = 0; j < from_arity / to_arity; j++)
+ {
+ add_child_at_index (new_child, PT_CHILD (root, j * to_arity + i), j);
+ }
+
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+
+ if (tmp != NULL)
+ {
+ add_child_at_index (new_root, tmp, i);
+ }
+ else
+ return NULL;
+ }
+
+ return new_root;
+}
+
+/*Function EXTR_arity_reduction.
+
+ EXTR Arity Reduction
+
+ EXTR_k,i^N EXTR_m,(i/(k/m))%m^Nm/k
+ | => |
+ | |
+ T EXTR_k/m,i%(k/m)^N
+ |
+ |
+ T
+*/
+
+struct primop_tree *
+EXTR_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+
+#ifndef GENERATOR_FILE
+ gcc_assert (from_arity == PT_DIVISION (root));
+
+ new_iter_count = size_binop (MULT_EXPR,
+ fold_convert (ssizetype, PT_ITER_COUNT (root)),
+ ssize_int (to_arity / from_arity));
+#else
+ new_iter_count = NULL;
+#endif
+
+ new_root = create_primTree_partition (POP_EXTR, NULL, to_arity,
+ (PT_OPERAND_SELECTOR (root) * to_arity / from_arity) % to_arity,
+ new_iter_count, PT_PARENT (root));
+
+ new_child = create_primTree_partition (POP_EXTR, NULL, from_arity/to_arity,
+ PT_OPERAND_SELECTOR (root) % (from_arity / to_arity),
+ PT_ITER_COUNT (root), new_root);
+
+ add_child_at_index (new_child, PT_CHILD (root, 0), 0);
+
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+
+ if (tmp != NULL)
+ {
+ add_child_at_index (new_root, tmp, 0);
+ }
+ else
+ return NULL;
+
+
+ return new_root;
+}
+
+/* Function k_arity_reduction.
+
+ Driver function for arity reduction of EXTR, ILV, CONCAT and SPLT. CONCAT
+ and SPLT need not be implemented due to phase ordering. */
+
+struct primop_tree *
+k_arity_reduction (struct primop_tree *root, int from_arity, int to_arity)
+{
+ if (PT_NODE_OP (root) == POP_ILV)
+ return ILV_arity_reduction (root, from_arity, to_arity);
+
+ if (PT_NODE_OP (root) == POP_EXTR)
+ return EXTR_arity_reduction (root, from_arity, to_arity);
+
+ /* For leaf nodes - like const or memory, return root. */
+ if (PT_ARITY (root) == 0)
+ return root;
+
+ return NULL;
+}
+
+/* Function ILV_arity_promotion.
+
+ ILV_k^N
+ / | \
+ / | \
+ / | \
+ T1 T2 Tk
+
+ _||_
+ \ /
+ \/
+
+ ILV_m^Nk/m
+ +-----------------+-----------+-+---------------------+
+ / / / \ \
+ / / / \ \
+ / / / \ \
+EXTR_m/k^N,0 ... EXTR_m/k^N,0 EXTR_m/k^N,1 ... EXTR_m/k^N,m/k-1 ... EXTR_m/k^N,m/k-1
+ | | | | |
+ T1 Tk T1 T1 Tk
+
+*/
+
+struct primop_tree *
+ILV_arity_promotion (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *new_root, *new_child, *tmp;
+ tree new_iter_count;
+ int i, j;
+
+#ifndef GENERATOR_FILE
+ gcc_assert (from_arity == PT_DIVISION (root));
+
+ new_iter_count = size_binop (EXACT_DIV_EXPR, size_binop (MULT_EXPR,
+ fold_convert (ssizetype,
+ PT_ITER_COUNT (root)),
+ ssize_int (from_arity)),
+ ssize_int (to_arity));
+#else
+ new_iter_count = NULL;
+#endif
+
+ new_root = create_primTree_combine (POP_ILV, NULL, to_arity,
+ new_iter_count, PT_PARENT (root));
+ for (i = 0; i < to_arity / from_arity; i++)
+ {
+ for (j = 0; j < from_arity; j++)
+ {
+ new_child = create_primTree_partition (POP_EXTR, NULL,
+ to_arity / from_arity, i, PT_ITER_COUNT (root),
+ new_root);
+ add_child_at_index (new_child, PT_CHILD (root, j), 0);
+ tmp = k_arity_promotion_reduction (new_child, to_arity);
+ if (tmp != NULL)
+ add_child_at_index (new_root, tmp, i * from_arity + j);
+ else
+ return NULL;
+ }
+ }
+
+ return new_root;
+}
+
+/* Function merge_EXTR_nodes.
+
+ Multiple EXTR nodes can be there in primop_tree, which need to be merged,
+ before promoting/reducing EXTR node for better optimization. If merged EXTR
+ node needs further promotion, the vectorization cannot be applied, as EXTR
+ promotion can introduce new ILV node.
+ TODO - Add stricter check for final promotion/reduction to target specific
+ arity. Currently, we do not block vectorization if EXTR arity cannot be
+ promoted.
+*/
+
+struct primop_tree *
+merge_EXTR_nodes (struct primop_tree *root, int to_arity)
+{
+ int from_arity, parts, selector;
+ struct primop_tree *iter_node, *tmp;
+ tree iter_count;
+
+ from_arity = PT_DIVISION (root);
+ parts = 1;
+ iter_node = root;
+ selector = 0;
+
+ while (PT_NODE_OP (iter_node) == POP_EXTR)
+ {
+ iter_count = PT_ITER_COUNT (iter_node);
+ parts = parts * PT_DIVISION (iter_node);
+ selector = selector * PT_DIVISION (iter_node)
+ + PT_OPERAND_SELECTOR (iter_node);
+ iter_node = PT_CHILD (iter_node, 0);
+ }
+
+ if (iter_node != PT_CHILD (root, 0))
+ {
+ tmp = create_primTree_partition (POP_EXTR, NULL, parts, selector,
+ iter_count, PT_PARENT (root));
+ add_child_at_index (tmp, iter_node, 0);
+ return tmp;
+ }
+
+ return root;
+}
+
+/* Function k_arity_promotion.
+
+ Driver function for arity promotion of EXTR, ILV, CONCAT and SPLT. CONCAT
+ and SPLIT need not be implemented due to phase ordering. Implementation of
+ ILV-promotion introduces new EXTR nodes whereas implementation of
+ EXTR-promotion introduce ILV node. To avoid the infinite loop, only
+ ILV-promotion or EXTR-promotion can be implemented. As the
+ k_arity_promotion_reduction is top-down algorithm, ILV-promotion is
+ implemented. For EXTR-promotion, merge newly created EXPR node with child
+ EXTR node if any, and hope that arity promotion/reduction is applicable
+ to new EXTR node. */
+
+struct primop_tree *
+k_arity_promotion (struct primop_tree *root, int from_arity, int to_arity)
+{
+ struct primop_tree *tmp_root;
+
+ if (PT_NODE_OP (root) == POP_ILV)
+ return ILV_arity_promotion (root, from_arity, to_arity);
+
+ if (PT_NODE_OP (root) == POP_EXTR)
+ {
+ tmp_root = merge_EXTR_nodes (root, to_arity);
+ if (tmp_root != root)
+ return k_arity_promotion_reduction (tmp_root, to_arity);
+ else
+ return root;
+ }
+/* For leaf nodes - like const or memory, return root. */
+ if (PT_ARITY (root) == 0)
+ return root;
+
+ return NULL;
+}
+
+/* Function k_arity_promotion_reduction.
+
+ Driver function for promoting/reducing arity of the tree rooted at ROOT from
+ FROM_ARITY to TO_ARITY. */
+
+struct primop_tree *
+k_arity_promotion_reduction (struct primop_tree *root, int to_arity)
+{
+ struct primop_tree *retval = root;
+ int from_arity, i;
+
+#ifndef GENERATOR_FILE
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "\n k_arity_promotion_reduction: ");
+ dump_primtree_node (MSG_NOTE, root);
+ }
+#endif
+
+ if (PT_NODE_OP (root) == POP_EXTR || PT_NODE_OP (root) == POP_SPLT)
+ from_arity = PT_DIVISION (root);
+ else
+ from_arity = PT_ARITY (root);
+
+ if (PT_NODE_OP (root) >= MAX_TREE_CODES && PT_NODE_OP (root) < POP_COLLAPSE)
+ {
+ if (from_arity > to_arity)
+ {
+ /* Arity reduction. */
+ if (from_arity % to_arity == 0)
+ {
+ retval = k_arity_reduction (root, from_arity, to_arity);
+ return retval;
+ }
+ else
+ return NULL;
+ }
+ else if (from_arity < to_arity)
+ {
+ /* Arity promotion. */
+ if (to_arity % from_arity == 0)
+ {
+ retval = k_arity_promotion (root, from_arity, to_arity);
+ return retval;
+ }
+ else
+ return NULL;
+ }
+ else
+ {
+ retval = duplicate_prim_node (root);
+ //return retval;
+ }
+ }
+
+ if (retval != NULL)
+ {
+ /* The tree node is compute-node. Hence, no action to be taken for arity
+ promotion/reduction. However, the subtrees below this root may need
+ arity adjustment. Hence, invoke k_arity_promotion_reduction algorithm
+ recursively on children of root. */
+ for (i = 0; i < retval->children.length (); i++)
+ {
+ struct primop_tree *tmp;
+ tmp = k_arity_promotion_reduction (PT_CHILD (retval, i), to_arity);
+ if (tmp == NULL)
+ return NULL;
+
+ PT_CHILD (retval, i) = tmp;
+ }
+
+ PT_ARITY (retval) = i;
+ }
+
+ return retval;
+
+
+}
+
+struct primtree_hash_table
+{
+ /* Assuming the operation has maximum 3 children. The key is created as
+ PRIMOP_CODE # <opd0 idx> # <opd1 idx> # <opd2 idx> where
+ PRIMOP_CODE is 10 bits, and <opdn idx> is 12 bit each. */
+ long long int key;
+
+ /* List of leaf-nodes in the subtree. */
+ vec<struct primop_tree *> leaves;
+};
+
+vec<struct primtree_hash_table *> primop_hash;
+
+bool
+compare_leaf_nodes (struct primop_tree *n1, struct primop_tree *n2)
+{
+ if (PT_NODE_OP (n1) != PT_NODE_OP (n2))
+ return false;
+
+ switch (PT_NODE_OP (n1))
+ {
+#ifndef GENERATOR_FILE
+ case POP_MEMREF:
+ if (operand_equal_p (PT_MEMVAL_BASE (n1), PT_MEMVAL_BASE (n2), 0)
+ && operand_equal_p (PT_MEMVAL_MULT_IDX (n1),
+ PT_MEMVAL_MULT_IDX (n2), 0)
+ && PT_MEMVAL_IS_READ (n1) == PT_MEMVAL_IS_READ (n2))
+ return true;
+ break;
+#endif
+ case POP_PH:
+ if (PT_PH_IDX (n1) == PT_PH_IDX (n2)
+ && PT_PH_TYPE (n1) == PT_PH_TYPE (n1))
+ return true;
+ break;
+
+ case POP_CONST:
+ gcc_assert (!"CONST is not supported for now.");
+ break;
+
+ default:
+ gcc_assert (0);
+ }
+
+ return false;
+}
+
+int
+lookup_key_in_table (long long int key, struct primop_tree **leaves, int length)
+{
+ int i, j;
+
+ for (i = 0; i < primop_hash.length (); i++)
+ {
+ if (key == primop_hash[i]->key
+ && length == primop_hash[i]->leaves.length ())
+ {
+ for (j = 0; j < length; j++)
+ {
+ if (!compare_leaf_nodes (primop_hash[i]->leaves[j], leaves[j]))
+ break;
+ }
+
+ if (j == length)
+ break;
+ }
+ }
+
+ if (i < primop_hash.length ())
+ return i;
+
+ return -1;
+}
+
+/* Function annotate_tree_nodes.
+
+ Add each subtree matching to
+
+*/
+
+int
+annotate_tree_nodes (struct primop_tree *ptree, int *end,
+ struct primop_tree **leaves)
+{
+ int key;
+ long long int idx;
+ int length = 0;
+ int i, j;
+ struct primop_tree *temp[150];
+ struct primtree_hash_table *new_hash;
+
+ if (PT_NODE_OP (ptree) == POP_MEMREF
+ || PT_NODE_OP (ptree) == POP_CONST
+ || PT_NODE_OP (ptree) == POP_PH)
+ {
+ leaves[(*end)++] = ptree;
+ return 0xfff;
+ }
+
+ key = PT_NODE_OP (ptree) << 10;
+
+ gcc_assert (ptree->children.length () < 4);
+
+ for (i = 0; i < ptree->children.length (); i++)
+ {
+ key = (key | annotate_tree_nodes (PT_CHILD (ptree, i),
+ &length, temp)) << 12;
+ for (j = 0; j < length; j++)
+ {
+ leaves[(*end)++] = temp[j];
+ }
+ }
+
+ idx = lookup_key_in_table (key, leaves, *end);
+
+ if (idx == -1)
+ {
+ // Create new entry.
+ new_hash = (struct primtree_hash_table *) xcalloc (1,
+ sizeof (struct primtree_hash_table));
+ new_hash->key = key;
+ new_hash->leaves = vNULL;
+ for (i = 0; i < *end; i++)
+ new_hash->leaves.safe_insert (new_hash->leaves.length (), leaves[i]);
+ idx = primop_hash.length ();
+ primop_hash.safe_insert (idx, new_hash);
+ }
+
+ PT_AUX (ptree) = idx;
+ return idx;
+}
+
+bool
+unity_redundancy_elimination_2 (struct primop_tree *ptree,
+ struct primop_tree **new_ptree)
+{
+ bool changed = false;
+ int to_be_matched;
+ struct primop_tree *temp_ptree;
+ int i;
+
+ *new_ptree = ptree;
+
+ if (PT_NODE_OP (ptree) == POP_EXTR
+ && PT_NODE_OP (PT_CHILD (ptree, 0)) == POP_ILV
+ && PT_DIVISION (ptree) == PT_DIVISION (PT_CHILD (ptree,
+ PT_OPERAND_SELECTOR (ptree))))
+ {
+ /* EXTR nodes have single child - and if it is ILV node, eliminate
+ EXTR and ILV nodes, and replace it with operand_selector^th
+ child of ILV node. */
+ changed = true;
+ *new_ptree = PT_CHILD (PT_CHILD (ptree, 0), PT_OPERAND_SELECTOR (ptree));
+ }
+
+ if (PT_NODE_OP (ptree) == POP_ILV)
+ {
+ for (i = 0; i < ptree->children.length (); i++)
+ {
+ if (PT_NODE_OP (PT_CHILD (ptree, i)) != POP_EXTR
+ || PT_DIVISION (ptree) != PT_DIVISION (PT_CHILD (ptree, i))
+ || PT_OPERAND_SELECTOR (PT_CHILD (ptree, i)) != i)
+ break;
+ if (i == 0)
+ to_be_matched = (int) PT_AUX (PT_CHILD (PT_CHILD (ptree, i), 0));
+ if (to_be_matched != (int) PT_AUX (PT_CHILD (PT_CHILD (ptree, i), 0)))
+ break;
+ }
+
+ if (i == ptree->children.length ())
+ {
+ changed = true;
+ *new_ptree = PT_CHILD (PT_CHILD (ptree, 0), 0);
+ }
+ }
+
+ for (i = 0; i < (*new_ptree)->children.length (); i++)
+ {
+ changed |= unity_redundancy_elimination_2 (PT_CHILD (*new_ptree, i),
+ &temp_ptree);
+ PT_CHILD (*new_ptree, i) = temp_ptree;
+ PT_PARENT (temp_ptree) = *new_ptree;
+ }
+
+ return changed;
+}
+
+/* Function unity_redundancy_elimination.
+
+ Perform unity reduction as shown in following rules:
+ - ILV_m (EXTR_0 (S), EXTR_1 (S),...EXTR_m-1 (S)) => S
+ - EXTR_m,x (ILV_M(S1, S2, ... Sm)) => Sx
+
+*/
+
+struct primop_tree *
+unity_redundancy_elimination (struct primop_tree *ptree)
+{
+ struct primop_tree *dummy[150];
+ struct primop_tree *new_ptree;
+ int end = 0;
+ bool changed;
+
+ annotate_tree_nodes (ptree, &end, dummy);
+
+ changed = false;
+
+ do {
+ changed = unity_redundancy_elimination_2 (ptree, &new_ptree);
+ if (ptree == new_ptree)
+ break;
+ ptree = new_ptree;
+ } while (changed == true);
+
+ return new_ptree;
+}
+
+#endif
diff --git a/gcc/tree-vect-unified.c b/gcc/tree-vect-unified.c
index 872d7d7..8e475f2 100644
--- a/gcc/tree-vect-unified.c
+++ b/gcc/tree-vect-unified.c
@@ -1,4 +1,10 @@
-/* lOOP Vectorization using unified representation
+/* lOOP Vectorization using unified representation for permute instructions.
+ Copyright (C) 2003-2015 Free Software Foundation, Inc.
+ Contributed by Sameera Deshpande <sameera.deshpande@imgtec.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
@@ -195,6 +201,52 @@ destroy_iter_node_info (struct ITER_node *inode)
free (inode);
}
+vec<struct stmt_attr *> stmt_attr_vec;
+
+void
+init_stmt_attr_vec (void)
+{
+ gcc_assert (!stmt_attr_vec.exists ());
+ stmt_attr_vec.create (50);
+}
+
+void
+free_stmt_attr_vec (void)
+{
+ gcc_assert (stmt_attr_vec.exists ());
+ stmt_attr_vec.release ();
+}
+
+inline void
+set_stmt_attr (gimple *stmt, struct stmt_attr *info)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ {
+ gcc_checking_assert (info);
+ uid = stmt_attr_vec.length () + 1;
+ gimple_set_uid (stmt, uid);
+ stmt_attr_vec.safe_push (info);
+ }
+ else
+ {
+ gcc_checking_assert (info == NULL);
+ stmt_attr_vec[uid - 1] = info;
+ }
+}
+
+inline struct stmt_attr *
+get_stmt_attr (gimple *stmt)
+{
+ unsigned int uid = gimple_uid (stmt);
+ if (uid == 0)
+ return NULL;
+
+ return stmt_attr_vec[uid - 1];
+}
+
+
+
/* Function new_stmt_attr.
Create statement attribute information, and return the pointer for the
@@ -214,6 +266,7 @@ new_stmt_attr ()
return info;
}
+
/* Function vect_populate_iter_node_from_loop.
Create new ITER_node corresponding to loop LOOP, and fill all fields in
@@ -228,7 +281,8 @@ vect_populate_iter_node_from_loop (struct loop *loop)
int i;
gimple_stmt_iterator si;
- if (! vect_analyze_loop_form_1 (loop, &loop_cond, &assumptions, &number_of_iterationsm1, &number_of_iterations, &inner_loop_cond))
+ if (! vect_analyze_loop_form_1 (loop, &loop_cond, &assumptions,
+ &number_of_iterationsm1, &number_of_iterations, &inner_loop_cond))
return NULL;
struct ITER_node * t_iter_node = new_iter_node (loop);
@@ -293,7 +347,7 @@ vect_analyze_dataref_access (struct data_reference *dr, struct loop * loop)
if (integer_zerop (step))
{
- /* Stride is 0, i.e. the data_ref is loop invariant. So, writes cannot be
+ /* Stride is 0, i.e. the data_ref is loop invariant. So, writes cannot be
vectorized. */
if (nested_in_vect_loop_p (loop, stmt))
{
@@ -1457,66 +1511,6 @@ normalize_base_addr (gimple *stmt, struct loop *loop, tree *offset)
return retval;
}
-/***** Helper functions for prim-tree creation *****/
-
-/* Function init_primop_node.
-
- This function creates PRIMOP_TREE node and initializes all its fields to 0.
-*/
-
-struct primop_tree *
-init_primop_node (void)
-{
- static int pid = 0;
- struct primop_tree *ptree;
- ptree = (struct primop_tree *) xcalloc (1, sizeof (struct primop_tree));
-
- PT_PID (ptree) = pid++;
- PT_NODE_OP (ptree) = 0;
- PT_ARITY (ptree) = 0;
- ptree->children = vNULL;
- PT_PARENT (ptree) = NULL;
- PT_ITER_COUNT (ptree) = NULL;
- PT_VEC_SIZE (ptree) = 0;
- PT_VEC_TYPE (ptree) = NULL;
- PT_VEC_INST (ptree) = vNULL;
- PT_TARGET_COST (ptree) = 0;
- PT_NUM_INSTANCES (ptree) = 0;
- PT_LOOP_DEPENDENCES (ptree) = vNULL;
- PT_DEP (ptree) =vNULL;
- PT_DEPTH (ptree) = 0;
- PT_ATTR_NO (ptree) = 0;
- PT_AUX (ptree) = NULL;
- memset (&ptree->u, 0, sizeof (ptree->u));
- return ptree;
-}
-
-/* Function populate_prim_node.
-
- This function returns PRIMOP_TREE node initialized with given information.
-*/
-
-struct primop_tree *
-populate_prim_node (enum primop_code pcode, struct ITER_node *inode,
- struct primop_tree *parent, gimple *stmt)
-{
- struct primop_tree *ptree;
- ptree = init_primop_node ();
-
- PT_NODE_OP (ptree) = (int) pcode;
- PT_PARENT (ptree) = parent;
- PT_ITER_COUNT (ptree) = ITER_NODE_NITERS (inode);
-
- if (stmt)
- {
- PT_VEC_TYPE (ptree) = STMT_ATTR_VECTYPE (stmt);
- PT_ATTR_NO (ptree) = gimple_uid (stmt);
- STMT_ATTR_TREE (stmt) = ptree;
- }
-
- return ptree;
-}
-
/* Function exists_primTree_with_memref.
This function checks if PRIMOP_TREE node corresponding to MEMREF already
@@ -1541,7 +1535,7 @@ exists_primTree_with_memref (tree base, tree step, bool is_read,
continue;
if (is_read != PT_MEMVAL_IS_READ (ptree))
- continue;
+ continue;
if (dump_enabled_p ())
{
@@ -1565,11 +1559,11 @@ exists_primTree_with_memref (tree base, tree step, bool is_read,
struct primop_tree *
create_primTree_memref (tree base, tree step, bool is_read, int num,
- struct ITER_node *inode, struct primop_tree *parent)
+ tree iter_count, struct primop_tree *parent)
{
struct primop_tree * ptree;
- ptree = populate_prim_node (POP_MEMREF, inode, parent, NULL);
+ ptree = populate_prim_node (POP_MEMREF, iter_count, parent, NULL);
PT_MEMVAL_BASE (ptree) = unshare_expr (base);
PT_MEMVAL_MULT_IDX (ptree) = unshare_expr (step);
@@ -1578,88 +1572,12 @@ create_primTree_memref (tree base, tree step, bool is_read, int num,
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_memref : stride - %d\n ",
- tree_to_uhwi (step) / num);
+ " create_primTree_memref %d : stride - %d\n ",
+ PT_PID (ptree), tree_to_uhwi (step) / num);
}
return ptree;
}
-/* Function create_primTree_combine.
-
- Create primtree with PCODE as interleave or concat. STMT is statement for
- which primtree is being created. */
-struct primop_tree *
-create_primTree_combine (enum primop_code pcode, gimple *stmt, int parts,
- struct ITER_node *inode, struct primop_tree *parent)
-{
- struct primop_tree * ptree;
-
- ptree = populate_prim_node (pcode, inode, parent, stmt);
- PT_OPERAND_SELECTOR (ptree) = -1;
- PT_DIVISION (ptree) = parts;
- PT_VAR_STRIDE (ptree) = NULL;
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_combine: parts - %d\n", parts);
- }
-
- return ptree;
-}
-
-/* Function create_primTree_partition.
-
- Create primtree with PCODE as split or extract. STMT is statement for which
- primtree is being created. PARTS is number of partitions to be created.
- SELECTOR is the part being selected. */
-struct primop_tree *
-create_primTree_partition (enum primop_code pcode, gimple *stmt, int parts,
- int selector, struct ITER_node *inode,
- struct primop_tree *parent)
-{
- struct primop_tree * ptree;
-
- ptree = populate_prim_node (pcode, inode, parent, stmt);
- PT_OPERAND_SELECTOR (ptree) = selector;
- PT_DIVISION (ptree) = parts;
- PT_VAR_STRIDE (ptree) = NULL;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- " create_primTree_partition : parts - %d selector - %d\n",
- parts, selector);
- }
-
- return ptree;
-}
-
-/* Function add_child_at_index.
-
- Attach PCHILD node as idx^th child of PNODE. */
-void
-add_child_at_index (struct primop_tree *ptree,
- struct primop_tree *pchild, int idx)
-{
- (PT_ARITY (ptree))++;
- while (idx >= ptree->children.length ())
- {
- ptree->children.safe_push (NULL);
- }
- PT_CHILD (ptree, idx) = pchild;
-}
-
-/* Function get_child_at_index.
-
- Get idx^th child of PNODE. */
-struct primop_tree *
-get_child_at_index (struct primop_tree *ptree, int idx)
-{
- gcc_assert (idx < PT_ARITY (ptree));
- return PT_CHILD (ptree, idx);
-}
-
-
/* Function vectorizable_store.
This function checks if STMT is STORE instruction and is vectorizable for
@@ -1699,7 +1617,7 @@ get_child_at_index (struct primop_tree *ptree, int idx)
eg:
j = op (v1, v2)
for (i = 0; i < 2048; i++)
- a[i] = b[i*j]
+ a[i] = b[i*j]
So, we should extract multiples of j from array b : for which instruction
generation is very difficult as we don't know what permute order to use.
@@ -1807,10 +1725,14 @@ vectorizable_store (gimple *stmt, struct ITER_node *inode,
pnode = exists_primTree_with_memref (base, step, false, inode);
if (pnode == NULL)
{
- pnode = create_primTree_memref (base, step, false, num, inode, NULL);
- ITER_NODE_LOOP_BODY (inode).safe_push (pnode);
+ pnode = create_primTree_memref (base, step, false, num,
+ ITER_NODE_NITERS (inode), NULL);
+ ITER_NODE_LOOP_BODY (inode).safe_insert (
+ ITER_NODE_LOOP_BODY (inode).length (),
+ pnode);
pchild1 = create_primTree_combine (POP_ILV, stmt,
- tree_to_uhwi (step) / num, inode, pnode);
+ tree_to_uhwi (step) / num, ITER_NODE_NITERS (inode),
+ pnode);
add_child_at_index (pnode, pchild1, 0);
}
else
@@ -1921,8 +1843,9 @@ vectorizable_load (gimple *stmt, struct ITER_node *inode,
pnode = create_primTree_partition (POP_EXTR, stmt,
tree_to_uhwi (step) / num,
- tree_to_uhwi (offset) / num, inode, parent);
- pchild1 = create_primTree_memref (base, step, true, num, inode, pnode);
+ tree_to_uhwi (offset) / num, ITER_NODE_NITERS (inode), parent);
+ pchild1 = create_primTree_memref (base, step, true, num,
+ ITER_NODE_NITERS (inode), pnode);
add_child_at_index (pnode, pchild1, 0);
return pnode;
}
@@ -1983,7 +1906,7 @@ analyze_and_create_ptree (struct primop_tree *parent, gimple *stmt,
if (!flow_bb_inside_loop_p (ITER_NODE_LOOP (inode), gimple_bb (stmt)))
{
if (dump_enabled_p ())
- {
+ {
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Not vectorized: Loop invariant not handled.\n");
}
@@ -2039,7 +1962,7 @@ is_ptree_complete (struct ITER_node *inode)
chlist = ptree->children.copy ();
while (chlist.length () > 0)
- {
+ {
if (chlist.pop () == NULL)
{
if (dump_enabled_p ())
@@ -2049,7 +1972,7 @@ is_ptree_complete (struct ITER_node *inode)
}
return false;
}
- }
+ }
}
return true;
}
@@ -2104,10 +2027,10 @@ create_ptree (struct ITER_node *inode)
if (is_ptree_complete (inode))
{
if (dump_enabled_p ())
- {
- dump_printf (MSG_NOTE,
- "Vectorized: ptree complete.\n");
- }
+ {
+ dump_printf (MSG_NOTE,
+ "Vectorized: ptree complete.\n");
+ }
return true;
}
else
@@ -2229,6 +2152,54 @@ vect_analyze_loop_with_prim_tree_2 (struct ITER_node *inode)
return create_ptree (inode);
}
+void
+print_primtree (FILE *fp, struct primop_tree *t)
+{
+ switch (PT_NODE_OP (t))
+ {
+ case POP_ILV:
+ case POP_CONCAT:
+ dump_printf (MSG_NOTE, "%s:%d_%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t), PT_DIVISION (t));
+ break;
+
+ case POP_EXTR:
+ case POP_SPLT:
+ dump_printf (MSG_NOTE, "%s:%d_%d,%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t), PT_DIVISION (t), PT_OPERAND_SELECTOR (t));
+ break;
+
+ case POP_MEMREF:
+ dump_printf (MSG_NOTE, "%s:%d\n", tree_code_name[PT_NODE_OP (t)],
+ PT_PID (t));
+ print_generic_expr (fp, PT_MEMVAL_BASE (t), TDF_SLIM);
+ break;
+
+ case POP_CONST:
+ case POP_INV:
+ case POP_COLLAPSE:
+ break;
+
+ case POP_ITER:
+ break;
+
+ default:
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, PT_COMPUTE_STMT (t), 0);
+ break;
+
+ }
+}
+
+void
+dump_primtree_node (int dump_kind, struct primop_tree *t)
+{
+ if (dump_file)
+ print_primtree (dump_file, t);
+
+ else if (alt_dump_file)
+ print_primtree (alt_dump_file, t);
+}
+
/* Function pretty_print_gimple_vec.
Function to pretty print all the gimple statements in LIST. */
@@ -2249,25 +2220,6 @@ pretty_print_gimple_vec (pretty_printer *pp, vec<gimple *> list)
}
}
-#define DEFTREECODE(SYM, NAME, TYPE, LEN) NAME,
-#define END_OF_BASE_TREE_CODES "@dummy",
-
-static const char *const tree_code_name[] = {
-#include "all-tree.def"
-"ILV",
-"CONCAT",
-"EXTR",
-"SPLT",
-"COLLAPSE",
-"MEMREF",
-"CONST",
-"INVAR",
-"ITER"
-};
-
-#undef DEFTREECODE
-#undef END_OF_BASE_TREE_CODES
-
/* Function pp_primop_tree.
Function to pretty print primop_tree PTREE. */
@@ -2275,6 +2227,7 @@ static const char *const tree_code_name[] = {
void
pp_primop_tree (pretty_printer *pp, struct primop_tree * ptree)
{
+ int i;
pp_newline_and_indent (pp, 0);
pp_indent (pp);
pp_printf (pp,"node [shape=record];");
@@ -2288,29 +2241,29 @@ switch (PT_NODE_OP (ptree))
{
case POP_ILV:
case POP_CONCAT:
- pp_printf (pp, "|%d", PT_DIVISION (ptree));
- break;
+ pp_printf (pp, "|%d", PT_DIVISION (ptree));
+ break;
case POP_EXTR:
case POP_SPLT:
- pp_printf (pp, "|div:%d", PT_DIVISION (ptree));
- pp_printf (pp, "|sel:%d", PT_OPERAND_SELECTOR (ptree));
- break;
+ pp_printf (pp, "|div:%d", PT_DIVISION (ptree));
+ pp_printf (pp, "|sel:%d", PT_OPERAND_SELECTOR (ptree));
+ break;
case POP_COLLAPSE:
- break;
+ break;
case POP_MEMREF:
- pp_printf (pp, "|");
- dump_generic_node (pp, PT_MEMVAL_BASE (ptree), 0, TDF_SLIM, false);
- pp_printf (pp, "|stride:");
- dump_generic_node (pp, PT_MEMVAL_MULT_IDX (ptree), 0, TDF_SLIM, false);
- break;
+ pp_printf (pp, "|");
+ dump_generic_node (pp, PT_MEMVAL_BASE (ptree), 0, TDF_SLIM, false);
+ pp_printf (pp, "|stride:");
+ dump_generic_node (pp, PT_MEMVAL_MULT_IDX (ptree), 0, TDF_SLIM, false);
+ break;
case POP_CONST:
- break;
+ break;
case POP_INV:
- break;
+ break;
case POP_ITER:
- break;
+ break;
default:
- break;
+ break;
}
pp_printf (pp, "}|{%d}\"];", PT_ARITY (ptree));
@@ -2329,12 +2282,14 @@ switch (PT_NODE_OP (ptree))
worklist = ptree->children.copy ();
- while (worklist.length () > 0)
+ for (i = 0; i < worklist.length (); i++)
{
- struct primop_tree *child = worklist.pop ();
+ struct primop_tree *child;
+ worklist.iterate (i, &child);
pp_newline_and_indent (pp, 0);
- pp_indent (pp);
- pp_printf (pp, "%d -> %d;", PT_PID (ptree), PT_PID (child));
+ pp_indent (pp);
+ pp_printf (pp, "%d -> %d [label = %d];", PT_PID (ptree),
+ PT_PID (child), i);
}
}
@@ -2348,13 +2303,16 @@ void
pretty_print_ptree_vec (pretty_printer *pp, vec<struct primop_tree *> list)
{
vec<struct primop_tree *> worklist;
+ struct primop_tree *tmp;
+ int i;
worklist = list.copy ();
- while (worklist.length () > 0)
+ for (i = 0; i < worklist.length (); i++)
{
pp_newline_and_indent (pp, 0);
- pp_primop_tree (pp, worklist.pop ());
+ worklist.iterate (i, &tmp);
+ pp_primop_tree (pp, tmp);
}
}
@@ -2448,6 +2406,8 @@ vectorize_loops_using_uniop (void)
unsigned int num_vectorized_loops = 0;
unsigned int ret = 0;
unsigned int i;
+ unsigned int vector_sizes, max_vec_size;
+
vect_loops_num = number_of_loops (cfun);
/* Bail out if there are no loops. */
@@ -2472,7 +2432,10 @@ vectorize_loops_using_uniop (void)
{
/* Vectorization should be possible. Let us find if all statements are
vectorizable, and if yes, create p-tree. */
- struct ITER_node * tmp_iter_node;
+ struct ITER_node * tmp_iter_node;
+ struct primop_tree *tmp_tree;
+ bool failed;
+ vec<struct primop_tree *> worklist;
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
@@ -2483,7 +2446,7 @@ vectorize_loops_using_uniop (void)
tmp_iter_node = vect_populate_iter_node_from_loop (loop);
- if (!tmp_iter_node)
+ if (!tmp_iter_node)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2491,9 +2454,9 @@ vectorize_loops_using_uniop (void)
continue;
}
- if (!vect_analyze_loop_with_prim_tree_2 (tmp_iter_node))
+ if (!vect_analyze_loop_with_prim_tree_2 (tmp_iter_node))
{
- destroy_iter_node_info (tmp_iter_node);
+ destroy_iter_node_info (tmp_iter_node);
loop->aux = NULL;
continue;
}
@@ -2515,6 +2478,55 @@ vectorize_loops_using_uniop (void)
dump_iter_node (tmp_iter_node, alt_dump_file);
}
+ /* To enable best possible instruction selection, the tree should be
+ promoted to MAX_VEC_SIZE first, and then reduced to arity supported
+ by architecture. The macro TARGET_VECTORIZATION_ARITY provides list
+ of arities supported by architecture. */
+
+ vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+ max_vec_size = 1 << floor_log2 (vector_sizes);
+
+ failed =false;
+ i = 0;
+ worklist = vNULL;
+ worklist = (ITER_NODE_LOOP_BODY (tmp_iter_node)).copy ();
+ for (i = 0; i < worklist.length (); i++)
+ {
+ gcc_assert (worklist.iterate (i, &tmp_tree));
+ tmp_tree = k_arity_promotion_reduction (tmp_tree, max_vec_size);
+ if (tmp_tree == NULL)
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "%d_promotion failed.\n", max_vec_size);
+ failed = true;
+ break;
+ }
+
+ tmp_tree = k_arity_promotion_reduction (tmp_tree, 2);
+ if (tmp_tree == NULL)
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "arity_promotion_reduction failed.\n");
+ failed = true;
+ break;
+ }
+
+ ITER_NODE_LOOP_BODY (tmp_iter_node)[i] = tmp_tree;
+ }
+
+ if (failed)
+ continue;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf (MSG_NOTE, "\nk-arity promotion/reduction applied.\n");
+ if (dump_file)
+ dump_iter_node (tmp_iter_node, dump_file);
+ if (alt_dump_file)
+ dump_iter_node (tmp_iter_node, alt_dump_file);
+ }
+
gimple *loop_vectorized_call = vect_loop_vectorized_call (loop);
/* If the loop is vectorized, set uid of stmts within scalar loop to
0. This change is needed if transform phase uses this loop info. */
diff --git a/gcc/tree-vect-unified.h b/gcc/tree-vect-unified.h
index df49334..9991ede 100644
--- a/gcc/tree-vect-unified.h
+++ b/gcc/tree-vect-unified.h
@@ -18,6 +18,9 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+#ifndef GCC_TREE_VECT_UNIFIED_H
+
+#define GCC_TREE_VECT_UNIFIED_H
/* Prim-op ITER : ITER node has 3 main objectives in loop representation using
primitive-ops for reordering -
@@ -67,11 +70,13 @@ struct ITER_node {
on temporary vectors. */
vec<gimple *> finish_stmts;
+#ifndef GENERATOR_FILE
/* All data references within the loop. */
vec<data_reference_p> datarefs;
/* All data dependences within the loop. */
vec<ddr_p> ddrs;
+#endif
/* List of all statements within the loop which have side effects beyond loop
body. probable root nodes are accumulated for loop by
@@ -105,12 +110,16 @@ enum stmt_use_type {
};
struct stmt_attr {
+#ifndef GENERATOR_FILE
enum stmt_vec_info_type type;
+#endif
enum stmt_use_type use_type;
tree access_fn;
struct primop_tree *ptree;
bool probable_root;
+#ifndef GENERATOR_FILE
struct data_reference *dr;
+#endif
tree vectype;
};
@@ -121,49 +130,7 @@ struct stmt_attr {
#define STMT_ATTR_DR(s) (get_stmt_attr (s))->dr
#define STMT_ATTR_VECTYPE(s) (get_stmt_attr (s))->vectype
-vec<struct stmt_attr *> stmt_attr_vec;
-
-void
-init_stmt_attr_vec (void)
-{
- gcc_assert (!stmt_attr_vec.exists ());
- stmt_attr_vec.create (50);
-}
-
-void
-free_stmt_attr_vec (void)
-{
- gcc_assert (stmt_attr_vec.exists ());
- stmt_attr_vec.release ();
-}
-
-inline void
-set_stmt_attr (gimple *stmt, struct stmt_attr *info)
-{
- unsigned int uid = gimple_uid (stmt);
- if (uid == 0)
- {
- gcc_checking_assert (info);
- uid = stmt_attr_vec.length () + 1;
- gimple_set_uid (stmt, uid);
- stmt_attr_vec.safe_push (info);
- }
- else
- {
- gcc_checking_assert (info == NULL);
- stmt_attr_vec[uid - 1] = info;
- }
-}
-
-inline struct stmt_attr *
-get_stmt_attr (gimple *stmt)
-{
- unsigned int uid = gimple_uid (stmt);
- if (uid == 0)
- return NULL;
-
- return stmt_attr_vec[uid - 1];
-}
+extern vec<struct stmt_attr *> stmt_attr_vec;
/* PRIMOP_TREE : Memory accesses within a loop have definite repetative pattern
which can be captured using primitive permute operators which can be used to
@@ -216,8 +183,10 @@ struct primop_tree {
most to outer-most. */
vec<struct ITER_node *> loop_dependences;
+#ifndef GENERATOR_FILE
/* Dependence links if any to other statements. */
vec<ddr_p> dependences;
+#endif
/* Depth within sub-tree of same type. */
int depth;
@@ -247,8 +216,14 @@ struct primop_tree {
tree mult_idx;
bool is_read;
} memval;
+
+ /* Placeholder for terminals in instruction tiles. */
+ struct placeholder {
+ int index;
+ char type;
+ } phval;
} u;
- void *aux;
+ int aux;
};
#define PT_PID(x) (x)->pid
@@ -275,16 +250,9 @@ struct primop_tree {
#define PT_MEMVAL_MULT_IDX(x) (x)->u.memval.mult_idx
#define PT_MEMVAL_IS_READ(x) (x)->u.memval.is_read
#define PT_AUX(x) (x)->aux
-
+#define PT_PH_IDX(x) (x)->u.phval.index
+#define PT_PH_TYPE(x) (x)->u.phval.type
//struct ITER_node *iter_node;
-
-extern unsigned int vectorize_loops_using_uniop (void);
-extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *,
- gimple *, struct ITER_node *);
-extern void pretty_print_ptree_vec (pretty_printer *,
- vec<struct primop_tree*>);
-extern void pretty_print_iter_node (pretty_printer *, struct ITER_node *, int);
-
enum primop_code {
POP_ILV=MAX_TREE_CODES,
POP_CONCAT,
@@ -294,4 +262,60 @@ enum primop_code {
POP_MEMREF,
POP_CONST,
POP_INV,
- POP_ITER};
+ POP_ITER,
+ POP_PH};
+
+#define DEFTREECODE(SYM, NAME, TYPE, LEN) NAME,
+#define END_OF_BASE_TREE_CODES "@dummy",
+
+static const char *const tree_code_name[] = {
+#include "all-tree.def"
+"ILV",
+"CONCAT",
+"EXTR",
+"SPLT",
+"COLLAPSE",
+"MEMREF",
+"CONST",
+"INVAR",
+"ITER",
+"PLACEHOLDER"
+};
+
+#undef DEFTREECODE
+#undef END_OF_BASE_TREE_CODES
+
+extern unsigned int vectorize_loops_using_uniop (void);
+extern struct primop_tree * analyze_and_create_ptree (struct primop_tree *,
+ gimple *, struct ITER_node *);
+extern void pretty_print_ptree_vec (pretty_printer *,
+ vec<struct primop_tree*>);
+extern void pretty_print_iter_node (pretty_printer *, struct ITER_node *, int);
+extern struct primop_tree * k_arity_promotion_reduction (struct primop_tree *,
+ int);
+extern struct primop_tree * init_primop_node (void);
+extern struct primop_tree * populate_prim_node (enum primop_code, tree,
+ struct primop_tree *, gimple *);
+extern struct primop_tree * exists_primTree_with_memref (tree, tree, bool,
+ struct ITER_node *);
+extern struct primop_tree * create_primTree_memref (tree, tree, bool, int, tree,
+ struct primop_tree *);
+extern struct primop_tree * create_primTree_combine (enum primop_code, gimple *,
+ int, tree, struct primop_tree *);
+extern struct primop_tree * create_primTree_partition (enum primop_code,
+ gimple *, int, int, tree,
+ struct primop_tree *);
+extern void add_child_at_index (struct primop_tree *, struct primop_tree *,
+ int);
+extern struct primop_tree * get_child_at_index (struct primop_tree *, int);
+extern struct primop_tree * duplicate_prim_node (struct primop_tree *);
+extern void pp_primop_tree (pretty_printer *, struct primop_tree *);
+extern void dump_primtree_node (int, struct primop_tree *);
+extern void init_stmt_attr_vec (void);
+extern void free_stmt_attr_vec (void);
+extern inline void set_stmt_attr (gimple *, struct stmt_attr *);
+extern inline struct stmt_attr *get_stmt_attr (gimple *);
+extern struct primop_tree * unity_redundancy_elimination (struct primop_tree *);
+
+#endif
+
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97e4d5e..bef4f16 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -852,8 +852,8 @@ get_earlier_stmt (gimple *stmt1, gimple *stmt2)
if (uid1 == 0 || uid2 == 0)
return NULL;
- gcc_checking_assert (uid1 <= stmt_vec_info_vec.length ()
- && uid2 <= stmt_vec_info_vec.length ());
+/* gcc_checking_assert (uid1 <= stmt_vec_info_vec.length ()
+ && uid2 <= stmt_vec_info_vec.length ());*/
if (uid1 < uid2)
return stmt1;
diff --git a/gcc/tree.c b/gcc/tree.c
index 3e63415..fc4de45 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -26,7 +26,7 @@ along with GCC; see the file COPYING3. If not see
It is intended to be language-independent but can occasionally
calls language-dependent routines. */
-
+#ifndef GENERATOR_FILE
#include "config.h"
#include "system.h"
#include "coretypes.h"
@@ -62,6 +62,18 @@ along with GCC; see the file COPYING3. If not see
#include "print-tree.h"
#include "ipa-utils.h"
#include "selftest.h"
+#else
+#include "bconfig.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "tree.h"
+#include "gimple.h"
+#include "vec.h"
+#include "hash-table.h"
+#include "hash-set.h"
+#endif
/* Tree code classes. */
diff --git a/gcc/tree.h b/gcc/tree.h
index 3b12509..7790b4e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5324,7 +5324,9 @@ namespace wi
wide_int min_value (const_tree);
wide_int max_value (const_tree);
+#ifndef GENERATOR_FILE
wide_int from_mpz (const_tree, mpz_t, bool);
+#endif
}
template <typename T>