diff options
| -rw-r--r-- | gcc/config/riscv/autovec-opt.md | 41 | ||||
| -rw-r--r-- | gcc/gimple-iterator.cc | 14 | ||||
| -rw-r--r-- | gcc/match.pd | 44 | ||||
| -rw-r--r-- | gcc/testsuite/g++.dg/torture/pr122589.C | 22 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-bool-3.c | 13 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c | 65 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h | 24 | ||||
| -rw-r--r-- | gcc/tree-vect-stmts.cc | 9 |
13 files changed, 228 insertions, 49 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 063c9a0..52ab79c 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -2424,3 +2424,44 @@ } [(set_attr "type" "vfalu")] ) + +;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB. +;; The vwsll.vi is zero extend, thus only the ashift bits +;; is equal or greater than double truncated bits is valid. +;; Appears in the satd function of x264. +(define_insn_and_split "*vwsll_sign_extend_<mode>" + [(set (match_operand:VWEXTI 0 "register_operand") + (ashift:VWEXTI + (sign_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) + (match_operand 2 "const_int_operand")))] + "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + int imm = INTVAL (operands[2]); + int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode)); + + if (imm >= trunc_prec) + { + insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode); + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); + } + else + { + insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode); + rtx extend = gen_reg_rtx (<MODE>mode); + rtx unary_ops[] = {extend, operands[1]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, + unary_ops); + + icode = code_for_pred_scalar (ASHIFT, <MODE>mode); + rtx binary_ops[] = {operands[0], extend, operands[2]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, + binary_ops); + } + + DONE; + } +) diff --git a/gcc/gimple-iterator.cc b/gcc/gimple-iterator.cc index 3af672b..741e5a5 100644 --- a/gcc/gimple-iterator.cc +++ b/gcc/gimple-iterator.cc @@ -389,8 +389,8 @@ gsi_set_stmt (gimple_stmt_iterator *gsi, gimple *stmt) } -/* Move all statements in the sequence before I to a new sequence. - Return this new sequence. I is set to the head of the new list. */ +/* Move all statements in the sequence starting at I to a new sequence. + Set *PNEW_SEQ to this sequence. I is set to the head of the new list. */ void gsi_split_seq_before (gimple_stmt_iterator *i, gimple_seq *pnew_seq) @@ -471,18 +471,16 @@ void gsi_replace_with_seq (gimple_stmt_iterator *gsi, gimple_seq seq, bool update_eh_info) { - gimple_stmt_iterator seqi; - gimple *last; if (gimple_seq_empty_p (seq)) { gsi_remove (gsi, true); return; } - seqi = gsi_last (seq); - last = gsi_stmt (seqi); - gsi_remove (&seqi, false); + gimple_seq tail; + gimple_stmt_iterator lasti = gsi_last (seq); + gsi_split_seq_before (&lasti, &tail); gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); - gsi_replace (gsi, last, update_eh_info); + gsi_replace (gsi, gsi_stmt (lasti), update_eh_info); } diff --git a/gcc/match.pd b/gcc/match.pd index 0ea86d9..3cd9ab1 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3726,53 +3726,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) bool c2_is_type_precision_p = c2 == prec; } (if (widen_prec > prec && c2_is_type_precision_p && c4_is_max_p))))) - (for mult_op (mult widen_mult) - (match (unsigned_integer_sat_mul @0 @1) - /* SAT_U_MUL (X, Y) = { - WT x = (WT)a * (WT)b; - NT hi = x >> (sizeof(NT) * 8); - NT lo = (NT)x; - return lo | -!!hi; - } while WT is uint128_t, uint64_t, uint32_t, uint16_t, - and T is uint64_t, uint32_t, uint16_t, uint8_t. */ - (convert1? - (bit_ior - (convert? - (negate - (convert (ne (convert2? (rshift @3 INTEGER_CST@2)) integer_zerop)))) - (convert (mult_op:c@3 (convert@4 @0) (convert@5 @1))))) - (if (types_match (type, @0, @1)) - (with - { - unsigned prec = TYPE_PRECISION (type); - unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); - unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4)); - unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5)); - - bool widen_mult_p = mult_op == WIDEN_MULT_EXPR && cvt4_prec == cvt5_prec - && widen_prec == cvt5_prec * 2; - bool mult_p = mult_op == MULT_EXPR && cvt4_prec == cvt5_prec - && cvt4_prec == widen_prec && widen_prec > prec; - bool c2_is_type_precision_p = tree_to_uhwi (@2) == prec; - } - (if (c2_is_type_precision_p && (mult_p || widen_mult_p))))))) (match (unsigned_integer_sat_mul @0 @1) + /* SAT_U_MUL (X, Y) = { + WT x = (WT)a * (WT)b; + NT hi = x >> (sizeof(NT) * 8); + NT lo = (NT)x; + return lo | -!!hi; + } while WT is uint128_t, uint64_t, uint32_t, uint16_t, + and T is uint64_t, uint32_t, uint16_t, uint8_t. */ (convert1? (bit_ior (convert? (negate (convert (ne (convert2? (rshift @3 INTEGER_CST@2)) integer_zerop)))) - (convert (widen_mult:c@3 @0 @1)))) + (convert (usmul_widen_mult@3 @0 @1)))) (if (types_match (type, @0, @1)) (with { unsigned prec = TYPE_PRECISION (type); - unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); - bool c2_is_type_precision_p = tree_to_uhwi (@2) == prec; - bool widen_mult_p = prec * 2 == widen_prec; } - (if (c2_is_type_precision_p && widen_mult_p))))) + (if (c2_is_type_precision_p))))) ) /* The boundary condition for case 10: IMM = 1: diff --git a/gcc/testsuite/g++.dg/torture/pr122589.C b/gcc/testsuite/g++.dg/torture/pr122589.C new file mode 100644 index 0000000..ac441a0 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr122589.C @@ -0,0 +1,22 @@ +// { dg-do compile } +// { dg-additional-options "-g" } + +struct QPointF { + QPointF(double xpos, double ypos) : xp(xpos), yp(ypos) {} + double xp; + double yp; +}; +double xp, yp, w, h; +struct QRectF { + QRectF(QPointF, int); + QPointF center() { return QPointF(xp + w / 2, yp + h / 2); } +}; +void clientArea(QPointF &); +int workspace_size; +void workspace() { + QRectF geom(QPointF(0, 0), workspace_size); + xp = 0 - w / 2; + yp = -h; + QPointF __trans_tmp_2 = geom.center(); + clientArea(__trans_tmp_2); +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-bool-3.c b/gcc/testsuite/gcc.dg/vect/vect-bool-3.c new file mode 100644 index 0000000..671f602 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bool-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_unpack } */ + +int count_true(const bool *values, int len) +{ + int count = 0; + for (int i = 0; i < len; i++) + count += values[i]; + return count; +} + +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c new file mode 100644 index 0000000..a42d7c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c new file mode 100644 index 0000000..2a3ef8d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c new file mode 100644 index 0000000..59a930a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c new file mode 100644 index 0000000..59a6d36 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c new file mode 100644 index 0000000..a9319a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c new file mode 100644 index 0000000..77fd95b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99" } */ + +#include "pr121959.h" + +#define WT int32_t +#define NT uint8_t +#define IMM 16 +#define N 16 + +DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) + +NT g_data[][2][N] = { + { + /* a */ + { + 2, 2, 2, 1, + 255, 255, 255, 255, + 128, 128, 128, 128, + 127, 127, 127, 127, + }, + /* b */ + { + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + 7, 7, 7, 7, + }, + }, +}; + +WT g_expect[][N] = { + /* 0 */ + { + 65536, 65536, 65536, 65536, + 16711680, 16711680, 16711680, 16711680, + 8257536, 8257536, 8257536, 8257536, + 7864320, 7864320, 7864320, 7864320, + }, +}; + +int +main () +{ + unsigned i, k; + WT out[N]; + + for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++) + { + NT *a = g_data[i][0]; + NT *b = g_data[i][1]; + WT *expect = g_expect[i]; + + RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N); + + for (k = 0; k < N; k++) + if (out[k] != expect[k]) + __builtin_abort (); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h new file mode 100644 index 0000000..10b1b62 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h @@ -0,0 +1,24 @@ +#ifndef HAVE_DEFINED_PR121959_H +#define HAVE_DEFINED_PR121959_H + +#include <stdint.h> + +#define DEF_VWSLL_FUNC_0(WT, NT, IMM) \ +void \ +test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \ + NT * restrict a, \ + NT * restrict b, \ + int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + res[i] = (a[i] - b[i]) << IMM; \ + } \ +} +#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM) +#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \ + test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n) +#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \ + RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) + +#endif diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 83acbb3..8692d44 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5282,15 +5282,12 @@ vectorizable_conversion (vec_info *vinfo, return false; if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) - && ((INTEGRAL_TYPE_P (lhs_type) - && !type_has_mode_precision_p (lhs_type)) - || (INTEGRAL_TYPE_P (rhs_type) - && !type_has_mode_precision_p (rhs_type)))) + && INTEGRAL_TYPE_P (lhs_type) + && !type_has_mode_precision_p (lhs_type)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "type conversion to/from bit-precision unsupported." - "\n"); + "type conversion to bit-precision unsupported\n"); return false; } |
