aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Dapp <rdapp@ventanamicro.com>2024-12-13 11:23:03 +0100
committerRobin Dapp <rdapp@ventanamicro.com>2024-12-16 15:25:27 +0100
commitce199a952bfef3e27354a4586a17bc55274c1d3c (patch)
treef53d1450495c916e0e2679be63e6da6d4bfc8aac
parentf1309dbc7c281dc9fd70db8ec6e8cb804b2089fc (diff)
downloadgcc-ce199a952bfef3e27354a4586a17bc55274c1d3c.zip
gcc-ce199a952bfef3e27354a4586a17bc55274c1d3c.tar.gz
gcc-ce199a952bfef3e27354a4586a17bc55274c1d3c.tar.bz2
RISC-V: Increase cost for vec_construct [PR118019].
For a generic vec_construct from scalar elements we need to load each scalar element and move it over to a vector register. Right now we only use a cost of 1 per element. This patch uses register-move cost as well as scalar_to_vec and multiplies it with the number of elements in the vector instead. PR target/118019 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_builtin_vectorization_cost): Increase vec_construct cost. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118019.c: New test.
-rw-r--r--gcc/config/riscv/riscv.cc8
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c52
2 files changed, 59 insertions, 1 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index be2ebf9..aa8a456 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12263,7 +12263,13 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
case vec_construct:
- return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ {
+ /* TODO: This is too pessimistic in case we can splat. */
+ int regmove_cost = fp ? costs->regmove->FR2VR
+ : costs->regmove->GR2VR;
+ return (regmove_cost + common_costs->scalar_to_vec_cost)
+ * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ }
default:
gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c
new file mode 100644
index 0000000..02b3ab4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d -mstrict-align -mvector-strict-align" } */
+
+/* Make sure we do not construct the vector element-wise despite
+ slow misaligned scalar and vector accesses. */
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) \
+ { \
+ int t0 = s0 + s1; \
+ int t1 = s0 - s1; \
+ int t2 = s2 + s3; \
+ int t3 = s2 - s3; \
+ d0 = t0 + t2; \
+ d2 = t0 - t2; \
+ d1 = t1 + t3; \
+ d3 = t1 - t3; \
+ }
+
+uint32_t
+abs2 (uint32_t a)
+{
+ uint32_t s = ((a >> 15) & 0x10001) * 0xffff;
+ return (a + s) ^ s;
+}
+
+int
+x264_pixel_satd_8x4 (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2)
+{
+ uint32_t tmp[4][4];
+ uint32_t a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2)
+ {
+ a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
+ a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
+ a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
+ a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
+ HADAMARD4 (tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ HADAMARD4 (a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((uint16_t) sum) + ((uint32_t) sum >> 16)) >> 1;
+}
+
+/* { dg-final { scan-assembler-not "lbu" } } */