aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-08-30 11:57:47 +0200
committerRichard Biener <rguenther@suse.de>2023-08-30 13:55:02 +0200
commitcaa7a99a052929d5970677c5b639e1fa5166e334 (patch)
tree308c2ea05998b100ab60279df08ad4d8fdbd49c2 /gcc
parentf7bff24905a6959f85f866390db2fff1d6f95520 (diff)
downloadgcc-caa7a99a052929d5970677c5b639e1fa5166e334.zip
gcc-caa7a99a052929d5970677c5b639e1fa5166e334.tar.gz
gcc-caa7a99a052929d5970677c5b639e1fa5166e334.tar.bz2
tree-optimization/111228 - combine two VEC_PERM_EXPRs
The following adds simplification of two VEC_PERM_EXPRs where the later one replaces all elements from either the first or the second input of the earlier permute. This allows a three input permute to be simplified to a two input one. I'm following the existing two input simplification case and only allow non-VLA permutes. The now existing three cases and the single case in tree-ssa-forwprop.cc somehow ask for merging, I'm not doing this as part of this change though. PR tree-optimization/111228 * match.pd ((vec_perm (vec_perm ..) @5 ..) -> (vec_perm @x @5 ..)): New simplifications. * gcc.dg/tree-ssa/forwprop-42.c: New testcase.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/match.pd141
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c17
2 files changed, 155 insertions, 3 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 47d2733..6a7edde 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8993,10 +8993,10 @@ and,
/* Merge
- c = VEC_PERM_EXPR <a, b, VCST0>;
- d = VEC_PERM_EXPR <c, c, VCST1>;
+ c = VEC_PERM_EXPR <a, b, VCST0>;
+ d = VEC_PERM_EXPR <c, c, VCST1>;
to
- d = VEC_PERM_EXPR <a, b, NEW_VCST>; */
+ d = VEC_PERM_EXPR <a, b, NEW_VCST>; */
(simplify
(vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
@@ -9038,6 +9038,141 @@ and,
(if (op0)
(vec_perm @1 @2 { op0; })))))))
+/* Merge
+ c = VEC_PERM_EXPR <a, b, VCST0>;
+ d = VEC_PERM_EXPR <x, c, VCST1>;
+ to
+ d = VEC_PERM_EXPR <x, {a,b}, NEW_VCST>;
+ when all elements from a or b are replaced by the later
+ permutation. */
+
+(simplify
+ (vec_perm @5 (vec_perm@0 @1 @2 VECTOR_CST@3) VECTOR_CST@4)
+ (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
+ (with
+ {
+ machine_mode result_mode = TYPE_MODE (type);
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+ int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+ vec_perm_builder builder0;
+ vec_perm_builder builder1;
+ vec_perm_builder builder2 (nelts, nelts, 2);
+ }
+ (if (tree_to_vec_perm_builder (&builder0, @3)
+ && tree_to_vec_perm_builder (&builder1, @4))
+ (with
+ {
+ vec_perm_indices sel0 (builder0, 2, nelts);
+ vec_perm_indices sel1 (builder1, 2, nelts);
+ bool use_1 = false, use_2 = false;
+
+ for (int i = 0; i < nelts; i++)
+ {
+ if (known_lt ((poly_uint64)sel1[i], sel1.nelts_per_input ()))
+ builder2.quick_push (sel1[i]);
+ else
+ {
+ poly_uint64 j = sel0[(sel1[i] - sel1.nelts_per_input ())
+ .to_constant ()];
+ if (known_lt (j, sel0.nelts_per_input ()))
+ use_1 = true;
+ else
+ {
+ use_2 = true;
+ j -= sel0.nelts_per_input ();
+ }
+ builder2.quick_push (j + sel1.nelts_per_input ());
+ }
+ }
+ }
+ (if (use_1 ^ use_2)
+ (with
+ {
+ vec_perm_indices sel2 (builder2, 2, nelts);
+ tree op0 = NULL_TREE;
+ /* If the new VEC_PERM_EXPR can't be handled but both
+ original VEC_PERM_EXPRs can, punt.
+ If one or both of the original VEC_PERM_EXPRs can't be
+ handled and the new one can't be either, don't increase
+ number of VEC_PERM_EXPRs that can't be handled. */
+ if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)
+ || (single_use (@0)
+ ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
+ || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
+ : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
+ op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+ }
+ (if (op0)
+ (switch
+ (if (use_1)
+ (vec_perm @5 @1 { op0; }))
+ (if (use_2)
+ (vec_perm @5 @2 { op0; })))))))))))
+
+/* And the case with swapped outer permute sources. */
+
+(simplify
+ (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @5 VECTOR_CST@4)
+ (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
+ (with
+ {
+ machine_mode result_mode = TYPE_MODE (type);
+ machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+ int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+ vec_perm_builder builder0;
+ vec_perm_builder builder1;
+ vec_perm_builder builder2 (nelts, nelts, 2);
+ }
+ (if (tree_to_vec_perm_builder (&builder0, @3)
+ && tree_to_vec_perm_builder (&builder1, @4))
+ (with
+ {
+ vec_perm_indices sel0 (builder0, 2, nelts);
+ vec_perm_indices sel1 (builder1, 2, nelts);
+ bool use_1 = false, use_2 = false;
+
+ for (int i = 0; i < nelts; i++)
+ {
+ if (known_ge ((poly_uint64)sel1[i], sel1.nelts_per_input ()))
+ builder2.quick_push (sel1[i]);
+ else
+ {
+ poly_uint64 j = sel0[sel1[i].to_constant ()];
+ if (known_lt (j, sel0.nelts_per_input ()))
+ use_1 = true;
+ else
+ {
+ use_2 = true;
+ j -= sel0.nelts_per_input ();
+ }
+ builder2.quick_push (j);
+ }
+ }
+ }
+ (if (use_1 ^ use_2)
+ (with
+ {
+ vec_perm_indices sel2 (builder2, 2, nelts);
+ tree op0 = NULL_TREE;
+ /* If the new VEC_PERM_EXPR can't be handled but both
+ original VEC_PERM_EXPRs can, punt.
+ If one or both of the original VEC_PERM_EXPRs can't be
+ handled and the new one can't be either, don't increase
+ number of VEC_PERM_EXPRs that can't be handled. */
+ if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)
+ || (single_use (@0)
+ ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
+ || !can_vec_perm_const_p (result_mode, op_mode, sel1, false))
+ : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
+ op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+ }
+ (if (op0)
+ (switch
+ (if (use_1)
+ (vec_perm @1 @5 { op0; }))
+ (if (use_2)
+ (vec_perm @2 @5 { op0; })))))))))))
+
/* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop.
The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
new file mode 100644
index 0000000..f3dbc3e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-cddce1" } */
+
+typedef unsigned long v2di __attribute__((vector_size(16)));
+
+v2di g;
+void test (v2di *v)
+{
+ v2di lo = v[0];
+ v2di hi = v[1];
+ v2di res;
+ res[1] = hi[1];
+ res[0] = lo[0];
+ g = res;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR <\[^>\]*, { 0, 3 }>" 1 "cddce1" } } */