diff options
author | Paul-Antoine Arras <parras@baylibre.com> | 2025-07-14 06:10:44 -0600 |
---|---|---|
committer | Jeff Law <jlaw@ventanamicro.com> | 2025-07-14 06:10:44 -0600 |
commit | 99a3c71db6edb8dc8c426cb2530f9cefbb5bfc9e (patch) | |
tree | e6372e92fad31e5c24959511cf942d70fafdd45d /gcc/testsuite | |
parent | 9c75032b40003bf0d3776aabdfc618ecb88c3a8a (diff) | |
download | gcc-99a3c71db6edb8dc8c426cb2530f9cefbb5bfc9e.zip gcc-99a3c71db6edb8dc8c426cb2530f9cefbb5bfc9e.tar.gz gcc-99a3c71db6edb8dc8c426cb2530f9cefbb5bfc9e.tar.bz2 |
[PATCH v2] RISC-V: Vector-scalar widening multiply-(subtract-)accumulate [PR119100]
This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a plus-mult or minus-mult RTL
instruction.
Before this patch, we have three instructions, e.g.:
fcvt.s.h fa5,fa5
vfmv.v.f v24,fa5
vfmadd.vv v8,v24,v16
After, we get only one:
vfwmacc.vf v8,fa5,v16
PR target/119100
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*vfwmacc_vf_<mode>): New pattern to
handle both vfwmacc and vfwmsac.
(*extend_vf_<mode>): New pattern that serves as an intermediate combine
step.
* config/riscv/vector-iterators.md (vsubel): New mode attribute. This is
just the lower-case version of VSUBEL.
* config/riscv/vector.md (@pred_widen_mul_<optab><mode>_scalar): Reorder
and swap operands to match the RTL emitted by expand, i.e. first
float_extend then vec_duplicate.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwmacc and
vfwmsac.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. Also check
for fcvt and vfmv.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Add vfwmacc and
vfwmsac.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. Also check
for fcvt and vfmv.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h: Add support for
widening variants.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_widen_run.h: New test
helper.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f32.c: New test.
Diffstat (limited to 'gcc/testsuite')
14 files changed, 160 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c index 05cf57c..b17fd8e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c @@ -11,6 +11,8 @@ DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, +, acc) DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, +, sac) DEF_VF_MULOP_ACC_CASE_0 (_Float16, +, -, nacc) DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, -, nsac) +DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, +, +, acc) +DEF_VF_MULOP_WIDEN_CASE_0 (_Float16, float, -, +, sac) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -20,3 +22,5 @@ DEF_VF_MULOP_ACC_CASE_0 (_Float16, -, -, nsac) /* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfnmacc.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c index 873e315..efd887d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c @@ -11,6 +11,8 @@ DEF_VF_MULOP_ACC_CASE_0 (float, +, +, acc) DEF_VF_MULOP_ACC_CASE_0 (float, -, +, sac) DEF_VF_MULOP_ACC_CASE_0 (float, +, -, nacc) DEF_VF_MULOP_ACC_CASE_0 (float, -, -, nsac) +DEF_VF_MULOP_WIDEN_CASE_0 (float, double, +, +, acc) +DEF_VF_MULOP_WIDEN_CASE_0 (float, double, -, +, sac) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -20,3 +22,5 @@ DEF_VF_MULOP_ACC_CASE_0 (float, -, -, nsac) /* { dg-final { scan-assembler-times {vfmsac.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfnmacc.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfnmsac.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwmacc.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwmsac.vf} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c index 78127b6..84987a9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c @@ -11,3 +11,7 @@ /* { dg-final { scan-assembler-not {vfmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfnmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.s.h} 2 } } */ +/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c index 30d57e0..dbd3d02 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c @@ -11,3 +11,7 @@ /* { dg-final { scan-assembler-not {vfmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfnmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.d.s} 2 } } */ +/* { dg-final { scan-assembler-times {vfmv.v.f} 10 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c index 8295ffb..5f0d758 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c @@ -11,6 +11,8 @@ DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, +, acc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, +, sac, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (_Float16, +, -, nacc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, -, nsac, VF_MULOP_ACC_BODY_X128) +DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, +, +, acc) +DEF_VF_MULOP_WIDEN_CASE_1 (_Float16, float, -, +, sac) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -20,3 +22,5 @@ DEF_VF_MULOP_ACC_CASE_1 (_Float16, -, -, nsac, VF_MULOP_ACC_BODY_X128) /* { dg-final { scan-assembler {vfmsac.vf} } } */ /* { dg-final { scan-assembler {vfnmacc.vf} } } */ /* { dg-final { scan-assembler {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler {vfwmsac.vf} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c index f237f84..951b0ef 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c @@ -11,6 +11,8 @@ DEF_VF_MULOP_ACC_CASE_1 (float, +, +, acc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (float, -, +, sac, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (float, +, -, nacc, VF_MULOP_ACC_BODY_X128) DEF_VF_MULOP_ACC_CASE_1 (float, -, -, nsac, VF_MULOP_ACC_BODY_X128) +DEF_VF_MULOP_WIDEN_CASE_1 (float, double, +, +, acc) +DEF_VF_MULOP_WIDEN_CASE_1 (float, double, -, +, sac) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -20,3 +22,5 @@ DEF_VF_MULOP_ACC_CASE_1 (float, -, -, nsac, VF_MULOP_ACC_BODY_X128) /* { dg-final { scan-assembler {vfmsac.vf} } } */ /* { dg-final { scan-assembler {vfnmacc.vf} } } */ /* { dg-final { scan-assembler {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler {vfwmsac.vf} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c index 7a50f67..a4edd92 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c @@ -11,3 +11,6 @@ /* { dg-final { scan-assembler-not {vfmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfnmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler {fcvt.s.h} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c index fb0493e..4eb28e5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c @@ -11,3 +11,6 @@ /* { dg-final { scan-assembler-not {vfmsac.vf} } } */ /* { dg-final { scan-assembler-not {vfnmacc.vf} } } */ /* { dg-final { scan-assembler-not {vfnmsac.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmacc.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmsac.vf} } } */ +/* { dg-final { scan-assembler {fcvt.d.s} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h index 1659f78..b1a324f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h @@ -34,6 +34,21 @@ #define RUN_VF_MULOP_ACC_CASE_0_WRAP(T, NAME, out, in, x, n) \ RUN_VF_MULOP_ACC_CASE_0 (T, NAME, out, in, x, n) +#define DEF_VF_MULOP_WIDEN_CASE_0(T1, T2, OP, NEG, NAME) \ + void test_vf_mulop_widen_##NAME##_##T1##_case_0 (T2 *restrict out, \ + T1 *restrict in, \ + T1 *restrict f, unsigned n) \ + { \ + for (unsigned i = 0; i < n; i++) \ + out[i] = NEG ((T2) * f * (T2) in[i] OP out[i]); \ + } +#define DEF_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NEG, NAME) \ + DEF_VF_MULOP_WIDEN_CASE_0 (T1, T2, OP, NEG, NAME) +#define RUN_VF_MULOP_WIDEN_CASE_0(T1, T2, NAME, out, in, x, n) \ + test_vf_mulop_widen_##NAME##_##T1##_case_0 (out, in, x, n) +#define RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, x, n) \ + RUN_VF_MULOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, x, n) + #define VF_MULOP_BODY(op, neg) \ out[k + 0] = neg (tmp * out[k + 0] op in[k + 0]); \ out[k + 1] = neg (tmp * out[k + 1] op in[k + 1]); \ @@ -129,4 +144,19 @@ #define DEF_VF_MULOP_ACC_CASE_1_WRAP(T, OP, NEG, NAME, BODY) \ DEF_VF_MULOP_ACC_CASE_1 (T, OP, NEG, NAME, BODY) +#define DEF_VF_MULOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NEG, NAME) \ + void test_vf_mulop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 ( \ + TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3, \ + TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b, \ + TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + dst[i] = NEG ((TYPE2) * a * (TYPE2) b[i] OP dst[i]); \ + dst2[i] = NEG ((TYPE2) * a2 * (TYPE2) b[i] OP dst2[i]); \ + dst3[i] = NEG ((TYPE2) * a2 * (TYPE2) a[i] OP dst3[i]); \ + dst4[i] = NEG ((TYPE2) * a * (TYPE2) b2[i] OP dst4[i]); \ + } \ + } + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_widen_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_widen_run.h new file mode 100644 index 0000000..9f95fbb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_widen_run.h @@ -0,0 +1,32 @@ +#ifndef HAVE_DEFINED_VF_MULOP_WIDEN_RUN_H +#define HAVE_DEFINED_VF_MULOP_WIDEN_RUN_H + +#include <assert.h> + +#define N 512 + +int main () +{ + T1 f[N]; + T1 in[N]; + T2 out[N]; + T2 out2[N]; + + for (int i = 0; i < N; i++) + { + f[i] = LIMIT + i % 8723; + in[i] = LIMIT + i & 1964; + out[i] = LIMIT + i & 628; + out2[i] = LIMIT + i & 628; + asm volatile ("" ::: "memory"); + } + + TEST_RUN (T1, T2, NAME, out, in, f, N); + + for (int i = 0; i < N; i++) + assert (out[i] == NEG(((T2) *f * (T2) in[i]) OP out2[i])); + + return 0; +} + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c new file mode 100644 index 0000000..d78cf73 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 _Float16 +#define T2 float +#define NAME acc +#define OP + +#define NEG + + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 + +#include "vf_mulop_widen_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f32.c new file mode 100644 index 0000000..1af5240 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 float +#define T2 double +#define NAME acc +#define OP + +#define NEG + + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 + +#include "vf_mulop_widen_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c new file mode 100644 index 0000000..6422bba --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 _Float16 +#define T2 float +#define NAME sac +#define OP - +#define NEG + + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 + +#include "vf_mulop_widen_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f32.c new file mode 100644 index 0000000..13617a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_mulop.h" + +#define T1 float +#define T2 double +#define NAME sac +#define OP - +#define NEG + + +DEF_VF_MULOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NEG, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_MULOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 + +#include "vf_mulop_widen_run.h" |