diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c | 124 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h | 163 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.c | 141 | ||||
-rw-r--r-- | gcc/vec-perm-indices.c | 59 | ||||
-rw-r--r-- | gcc/vec-perm-indices.h | 1 |
6 files changed, 482 insertions, 15 deletions
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c new file mode 100644 index 0000000..987d6db --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c @@ -0,0 +1,124 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#include "vec-perm-ctor.h" + +#include <stdlib.h> + +int +main () +{ + du a_du = 100ULL; + du b_du = 200ULL; + + di a_di = -100; + di b_di = 200; + + df a_df = 10.0; + df b_df = 20.0; + + si a_si = 12; + si b_si = -25; + si c_si = -37; + si d_si = 50; + + sf a_sf = 30.0f; + sf b_sf = 40.0f; + sf c_sf = 50.0f; + sf d_sf = 60.0f; + + hu a_hu = 10; + hu b_hu = 20; + hu c_hu = 30; + hu d_hu = 40; + hu e_hu = 50; + hu f_hu = 60; + hu g_hu = 70; + hu h_hu = 80; + + qi a_qi = 10; + qi b_qi = 20; + qi c_qi = -30; + qi d_qi = 40; + qi e_qi = -50; + qi f_qi = 60; + qi g_qi = 70; + qi h_qi = -80; + + v2du res1 = test_ctor_ctor_same_du (a_du, b_du); + if (res1[0] != a_du || res1[1] != b_du) + abort (); + + v2df res2 = test_ctor_ctor_same_df (a_df, b_df); + if (res2[0] != a_df || res2[1] != b_df) + abort (); + + v4si res3 = test_ctor_ctor_same_si (a_si, b_si, c_si, d_si); + if (res3[0] != a_si || res3[1] != b_si || res3[2] != c_si || res3[3] != d_si) + abort (); + + v4sf res4 = test_ctor_ctor_same_sf (a_sf, b_sf, c_sf, d_sf); + if (res4[0] != a_sf || res4[1] != b_sf || res4[2] != c_sf || res4[3] != d_sf) + abort (); + + v8hu res5 + = test_ctor_ctor_same_hu (a_hu, b_hu, c_hu, d_hu, e_hu, f_hu, g_hu, h_hu); + + if (res5[0] != a_hu || res5[1] != b_hu || res5[2] != c_hu || res5[3] != d_hu + || res5[4] != e_hu || res5[5] != f_hu || res5[6] != g_hu + || res5[7] != h_hu) + abort (); + + v16qi res6 + = test_ctor_ctor_same_qi (a_qi, b_qi, c_qi, d_qi, e_qi, f_qi, g_qi, h_qi); + + if (res6[0] != a_qi || res6[1] != b_qi || res6[2] != c_qi || res6[3] != d_qi + || res6[4] != a_qi || res6[5] != b_qi || res6[6] != c_qi + || res6[7] != d_qi || res6[8] != e_qi || res6[9] != f_qi + || res6[10] != g_qi || res6[11] != h_qi || res6[12] != e_qi + || res6[13] != f_qi || res6[14] != g_qi || res6[15] != h_qi) + abort (); + + v2du res7 = test_ctor_cst_same_du (a_du, b_du); + if (res7[0] != a_du || res7[1] != 100) + abort (); + + v4sf res8 = test_ctor_cst_same_sf (a_sf, b_sf); + if (res8[0] != a_sf || res8[1] != 2.0f || res8[2] != b_sf || res8[3] != 4.0f) + abort (); + + v2df res9 = test_ctor_cst_same_df (a_df, b_df); + if (res9[0] != b_df || res9[1] != 200.0) + abort (); + + v4si res10 = test_cst_ctor_same_si (a_si, b_si); + if (res10[0] != 1 || res10[1] != 3 || res10[2] != a_si || res10[3] != b_si) + abort (); + + v2di res11 = test_ctor_cst_diff_di_si (a_di, b_di); + /* Need to take care of the endianness since the function converts vector + const to one different vector type (element size), the endianness + determines the reinterpreted layout. Same reason for res12 below. */ + if (res11[0] != -100 || +#ifdef __LITTLE_ENDIAN__ + res11[1] != 3 +#else + res11[1] != 0x300000000LL +#endif + ) + abort (); + + v2du res12 = test_cst_ctor_diff_sf_du (a_du, b_du); + if ( +#ifdef __LITTLE_ENDIAN__ + res12[0] != 0x400000003f800000ULL +#else + res12[0] != 0x3f80000040000000ULL +#endif + || res12[1] != 100) + abort (); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c new file mode 100644 index 0000000..cc59e60 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -fdump-tree-optimized" } */ + +/* To test all permutations fed by CTOR and CST can be optimized away. */ + +#include "vec-perm-ctor.h" + +/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR" "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h new file mode 100644 index 0000000..1878270 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h @@ -0,0 +1,163 @@ +#include "altivec.h" + +typedef vector unsigned long long v2du; +typedef vector signed long long v2di; +typedef vector unsigned int v4su; +typedef vector signed int v4si; +typedef vector unsigned short v8hu; +typedef vector signed short v8hi; +typedef vector unsigned char v16qu; +typedef vector signed char v16qi; +typedef vector double v2df; +typedef vector float v4sf; + +typedef unsigned long long du; +typedef signed long long di; +typedef unsigned int su; +typedef signed int si; +typedef unsigned short hu; +typedef signed short hi; +typedef unsigned char qu; +typedef signed char qi; +typedef double df; +typedef float sf; + +/* To test whether we can optimize vector permutation away when + the two inputs are same type CTOR or one input is CTOR and the + other is CST. */ + +/* CTOR + CTOR part (only same type supported). */ + +/* Test both operands are same type CTOR (type unsigned long long). */ +__attribute__ ((noipa)) v2du +test_ctor_ctor_same_du (du a, du b) +{ + v2du v1 = {a, 0}; + v2du v2 = {b, 0}; + v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; + v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* Test both operands are same type CTOR (type double). */ +__attribute__ ((noipa)) v2df +test_ctor_ctor_same_df (df a, df b) +{ + v2df v1 = {0.0, a}; + v2df v2 = {0.0, b}; + v16qu vc = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; + v2df vres = (v2df) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* Test both operands are same type CTOR (type signed int). */ +__attribute__ ((noipa)) v4si +test_ctor_ctor_same_si (si a, si b, si c, si d) +{ + v4si v1 = {0, a, 0, c}; + v4si v2 = {0, b, 0, d}; + v16qu vc = {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}; + v4si vres = (v4si) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* Test both operands are same type CTOR (type float). */ +__attribute__ ((noipa)) v4sf +test_ctor_ctor_same_sf (sf a, sf b, sf c, sf d) +{ + v4sf v1 = {c, 0.0f, d, 0.0f}; + v4sf v2 = {a, 0.0f, b, 0.0f}; + v16qu vc = {16, 17, 18, 19, 24, 25, 26, 27, 0, 1, 2, 3, 8, 9, 10, 11}; + v4sf vres = (v4sf) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* Test both operands are same type CTOR (type unsigned short). */ +__attribute__ ((noipa)) v8hu +test_ctor_ctor_same_hu (hu a, hu b, hu c, hu d, hu e, hu f, hu g, hu h) +{ + v8hu v1 = {0, a, 0, b, 0, c, 0, d}; + v8hu v2 = {0, e, 0, f, 0, g, 0, h}; + v16qu vc = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; + v8hu vres = (v8hu) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* Test both operands are same type CTOR (type signed char). */ +__attribute__ ((noipa)) v16qi +test_ctor_ctor_same_qi (qi a, qi b, qi c, qi d, qi e, qi f, qi g, qi h) +{ + v16qi v1 = {0, a, 0, b, 0, c, 0, d, 0, a, 0, b, 0, c, 0, d}; + v16qi v2 = {0, e, 0, f, 0, g, 0, h, 0, e, 0, f, 0, g, 0, h}; + v16qu vc = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + v16qi vres = (v16qi) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* CTOR + CST part (same type). */ + +__attribute__ ((noipa)) v2du +test_ctor_cst_same_du (du a, du b) +{ + v2du v1 = {a, b}; + v2du v2 = {100, 200}; + v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; + v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +__attribute__ ((noipa)) v4sf +test_ctor_cst_same_sf (sf a, sf b) +{ + v4sf v1 = {0.0f, a, 0.0f, b}; + v4sf v2 = {1.0f, 2.0f, 3.0f, 4.0f}; + v16qu vc = {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}; + v4sf vres = (v4sf) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* CST + CTOR part (same type). */ + +__attribute__ ((noipa)) v2df +test_ctor_cst_same_df (df a, df b) +{ + v2df v1 = {a, b}; + v2df v2 = {100.0, 200.0}; + v16qu vc = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; + v2df vres = (v2df) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +__attribute__ ((noipa)) v4si +test_cst_ctor_same_si (si a, si b) +{ + v4si v1 = {a, 0, b, 0}; + v4si v2 = {1, 2, 3, 4}; + v16qu vc = {16, 17, 18, 19, 24, 25, 26, 27, 0, 1, 2, 3, 8, 9, 10, 11}; + v4si vres = (v4si) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* CTOR + CST part (different types). */ + +__attribute__ ((noipa)) v2di +test_ctor_cst_diff_di_si (di a, di b) +{ + v2di v1 = {a, b}; + v4si v2 = {3, 0, 4, 0}; + v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; + v2di vres = (v2di) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} + +/* CST + CTOR part (different types). */ + +__attribute__ ((noipa)) v2du +test_cst_ctor_diff_sf_du (du a, du b) +{ + v4sf v1 = {1.0f, 2.0f, 3.0f, 4.0f}; + v2du v2 = {a, b}; + v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; + v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc); + return vres; +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 0706fd8..beb2702 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2120,9 +2120,9 @@ static int simplify_permutation (gimple_stmt_iterator *gsi) { gimple *stmt = gsi_stmt (*gsi); - gimple *def_stmt; + gimple *def_stmt = NULL; tree op0, op1, op2, op3, arg0, arg1; - enum tree_code code; + enum tree_code code, code2 = ERROR_MARK; bool single_use_op0 = false; gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR); @@ -2142,10 +2142,28 @@ simplify_permutation (gimple_stmt_iterator *gsi) else if (TREE_CODE (op0) == SSA_NAME) { def_stmt = get_prop_source_stmt (op0, false, &single_use_op0); - if (!def_stmt || !can_propagate_from (def_stmt)) + if (!def_stmt) return 0; - code = gimple_assign_rhs_code (def_stmt); + if (code == VIEW_CONVERT_EXPR) + { + tree rhs = gimple_assign_rhs1 (def_stmt); + tree name = TREE_OPERAND (rhs, 0); + if (TREE_CODE (name) != SSA_NAME) + return 0; + if (!has_single_use (name)) + single_use_op0 = false; + /* Here we update the def_stmt through this VIEW_CONVERT_EXPR, + but still keep the code to indicate it comes from + VIEW_CONVERT_EXPR. */ + def_stmt = SSA_NAME_DEF_STMT (name); + if (!def_stmt || !is_gimple_assign (def_stmt)) + return 0; + if (gimple_assign_rhs_code (def_stmt) != CONSTRUCTOR) + return 0; + } + if (!can_propagate_from (def_stmt)) + return 0; arg0 = gimple_assign_rhs1 (def_stmt); } else @@ -2173,12 +2191,10 @@ simplify_permutation (gimple_stmt_iterator *gsi) update_stmt (stmt); return remove_prop_source_from_use (op0) ? 2 : 1; } - - /* Shuffle of a constructor. */ - else if (code == CONSTRUCTOR || code == VECTOR_CST) + else if (code == CONSTRUCTOR + || code == VECTOR_CST + || code == VIEW_CONVERT_EXPR) { - tree opt; - bool ret = false; if (op0 != op1) { if (TREE_CODE (op0) == SSA_NAME && !single_use_op0) @@ -2188,14 +2204,27 @@ simplify_permutation (gimple_stmt_iterator *gsi) arg1 = op1; else if (TREE_CODE (op1) == SSA_NAME) { - enum tree_code code2; - gimple *def_stmt2 = get_prop_source_stmt (op1, true, NULL); - if (!def_stmt2 || !can_propagate_from (def_stmt2)) + if (!def_stmt2) return 0; - code2 = gimple_assign_rhs_code (def_stmt2); - if (code2 != CONSTRUCTOR && code2 != VECTOR_CST) + if (code2 == VIEW_CONVERT_EXPR) + { + tree rhs = gimple_assign_rhs1 (def_stmt2); + tree name = TREE_OPERAND (rhs, 0); + if (TREE_CODE (name) != SSA_NAME) + return 0; + if (!has_single_use (name)) + return 0; + def_stmt2 = SSA_NAME_DEF_STMT (name); + if (!def_stmt2 || !is_gimple_assign (def_stmt2)) + return 0; + if (gimple_assign_rhs_code (def_stmt2) != CONSTRUCTOR) + return 0; + } + else if (code2 != CONSTRUCTOR && code2 != VECTOR_CST) + return 0; + if (!can_propagate_from (def_stmt2)) return 0; arg1 = gimple_assign_rhs1 (def_stmt2); } @@ -2209,10 +2238,92 @@ simplify_permutation (gimple_stmt_iterator *gsi) return 0; arg1 = arg0; } - opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (op0), arg0, arg1, op2); + + /* If there are any VIEW_CONVERT_EXPRs found when finding permutation + operands source, check whether it's valid to transform and prepare + the required new operands. */ + if (code == VIEW_CONVERT_EXPR || code2 == VIEW_CONVERT_EXPR) + { + /* Figure out the target vector type to which operands should be + converted. If both are CONSTRUCTOR, the types should be the + same, otherwise, use the one of CONSTRUCTOR. */ + tree tgt_type = NULL_TREE; + if (code == VIEW_CONVERT_EXPR) + { + gcc_assert (gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR); + code = CONSTRUCTOR; + tgt_type = TREE_TYPE (arg0); + } + if (code2 == VIEW_CONVERT_EXPR) + { + tree arg1_type = TREE_TYPE (arg1); + if (tgt_type == NULL_TREE) + tgt_type = arg1_type; + else if (tgt_type != arg1_type) + return 0; + } + + if (!VECTOR_TYPE_P (tgt_type)) + return 0; + tree op2_type = TREE_TYPE (op2); + /* Should have folded this before. */ + gcc_assert (op2_type != tgt_type); + + /* Figure out the shrunk factor. */ + poly_uint64 tgt_units = TYPE_VECTOR_SUBPARTS (tgt_type); + poly_uint64 op2_units = TYPE_VECTOR_SUBPARTS (op2_type); + if (maybe_gt (tgt_units, op2_units)) + return 0; + unsigned int factor; + if (!constant_multiple_p (op2_units, tgt_units, &factor)) + return 0; + + /* Build the new permutation control vector as target vector. */ + vec_perm_builder builder; + if (!tree_to_vec_perm_builder (&builder, op2)) + return 0; + vec_perm_indices indices (builder, 2, op2_units); + vec_perm_indices new_indices; + if (new_indices.new_shrunk_vector (indices, factor)) + { + tree mask_type = tgt_type; + if (!VECTOR_INTEGER_TYPE_P (mask_type)) + { + tree elem_type = TREE_TYPE (mask_type); + unsigned elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); + tree int_type = build_nonstandard_integer_type (elem_size, 0); + mask_type = build_vector_type (int_type, tgt_units); + } + op2 = vec_perm_indices_to_tree (mask_type, new_indices); + } + else + return 0; + + /* Convert the VECTOR_CST to the appropriate vector type. */ + if (tgt_type != TREE_TYPE (arg0)) + arg0 = fold_build1 (VIEW_CONVERT_EXPR, tgt_type, arg0); + else if (tgt_type != TREE_TYPE (arg1)) + arg1 = fold_build1 (VIEW_CONVERT_EXPR, tgt_type, arg1); + } + + /* VIEW_CONVERT_EXPR should be updated to CONSTRUCTOR before. */ + gcc_assert (code == CONSTRUCTOR || code == VECTOR_CST); + + /* Shuffle of a constructor. */ + bool ret = false; + tree res_type = TREE_TYPE (arg0); + tree opt = fold_ternary (VEC_PERM_EXPR, res_type, arg0, arg1, op2); if (!opt || (TREE_CODE (opt) != CONSTRUCTOR && TREE_CODE (opt) != VECTOR_CST)) return 0; + /* Found VIEW_CONVERT_EXPR before, need one explicit conversion. */ + if (res_type != TREE_TYPE (op0)) + { + tree name = make_ssa_name (TREE_TYPE (opt)); + gimple *ass_stmt = gimple_build_assign (name, opt); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + opt = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (op0), name); + } gimple_assign_set_rhs_from_tree (gsi, opt); update_stmt (gsi_stmt (*gsi)); if (TREE_CODE (op0) == SSA_NAME) diff --git a/gcc/vec-perm-indices.c b/gcc/vec-perm-indices.c index ede590d..31b32ea 100644 --- a/gcc/vec-perm-indices.c +++ b/gcc/vec-perm-indices.c @@ -101,6 +101,65 @@ vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, m_encoding.finalize (); } +/* Check whether we can switch to a new permutation vector that + selects the same input elements as ORIG, but with each element + built up from FACTOR pieces. Return true if yes, otherwise + return false. Every FACTOR permutation indexes should be + continuous separately and the first one of each batch should + be able to exactly modulo FACTOR. For example, if ORIG is + { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation + is { 1, 2, 0, 3 }. */ + +bool +vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig, + unsigned int factor) +{ + gcc_assert (factor > 0); + + if (maybe_lt (orig.m_nelts_per_input, factor)) + return false; + + poly_uint64 nelts; + /* Invalid if vector units number isn't multiple of factor. */ + if (!multiple_p (orig.m_nelts_per_input, factor, &nelts)) + return false; + + /* Only handle the case that npatterns is multiple of factor. + FIXME: Try to see whether we can reshape it by factor npatterns. */ + if (orig.m_encoding.npatterns () % factor != 0) + return false; + + unsigned int encoded_nelts = orig.m_encoding.encoded_nelts (); + auto_vec<element_type, 32> encoding (encoded_nelts); + /* Separate all encoded elements into batches by size factor, + then ensure the first element of each batch is multiple of + factor and all elements in each batch is consecutive from + the first one. */ + for (unsigned int i = 0; i < encoded_nelts; i += factor) + { + element_type first = orig.m_encoding[i]; + element_type new_index; + if (!multiple_p (first, factor, &new_index)) + return false; + for (unsigned int j = 1; j < factor; ++j) + if (maybe_ne (first + j, orig.m_encoding[i + j])) + return false; + encoding.quick_push (new_index); + } + + m_ninputs = orig.m_ninputs; + m_nelts_per_input = nelts; + poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor); + unsigned int npatterns = orig.m_encoding.npatterns () / factor; + + m_encoding.new_vector (full_nelts, npatterns, + orig.m_encoding.nelts_per_pattern ()); + m_encoding.splice (encoding); + m_encoding.finalize (); + + return true; +} + /* Rotate the inputs of the permutation right by DELTA inputs. This changes the values of the permutation vector but it doesn't change the way that the elements are encoded. */ diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h index bc70ecd..98d27f0 100644 --- a/gcc/vec-perm-indices.h +++ b/gcc/vec-perm-indices.h @@ -57,6 +57,7 @@ public: void new_vector (const vec_perm_builder &, unsigned int, poly_uint64); void new_expanded_vector (const vec_perm_indices &, unsigned int); + bool new_shrunk_vector (const vec_perm_indices &, unsigned int); void rotate_inputs (int delta); /* Return the underlying vector encoding. */ |