RISC-V: vector absolute difference expander [PR117722]

This improves codegen for x264 sum of absolute difference routines. The insn count is same, but we avoid double widening ops and ensuing whole register moves. Also for more general applicability, we chose to implement abs diff vs. the sum of abs diff variant. Suggested-by: Robin Dapp <rdapp@ventanamicro.com> Co-authored-by: Pan Li <pan2.li@intel.com> Signed-off-by: Vineet Gupta <vineetg@rivosinc.com> PR target/117722 gcc/ChangeLog: * config/riscv/autovec.md: Add uabd expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr117722.c: New test.
author: Vineet Gupta <vineetg@rivosinc.com> 2025-01-07 14:28:25 -0800
committer: Vineet Gupta <vineetg@rivosinc.com> 2025-01-07 14:28:25 -0800
commit: b755c151fde4ad736405bb2e13a7de0420161179 (patch)
tree: 5b7985fba6944b882e8c67f19dab0c2951318fdc /gcc
parent: 0115ef57efa9966fa7f448185dd5c741f58d4fac (diff)
download: gcc-b755c151fde4ad736405bb2e13a7de0420161179.zip
gcc-b755c151fde4ad736405bb2e13a7de0420161179.tar.gz
gcc-b755c151fde4ad736405bb2e13a7de0420161179.tar.bz2
2 files changed, 49 insertions, 0 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 8d22b5f..8426f12 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2928,3 +2928,29 @@
     riscv_vector::expand_strided_store (<MODE>mode, operands);
     DONE;
   })
+
+; ========
+; == Absolute difference (not including sum)
+; ========
+(define_expand "uabd<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx max = gen_reg_rtx (<MODE>mode);
+    insn_code icode = code_for_pred (UMAX, <MODE>mode);
+    rtx ops1[] = {max, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
+
+    rtx min = gen_reg_rtx (<MODE>mode);
+    icode = code_for_pred (UMIN, <MODE>mode);
+    rtx ops2[] = {min, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
+
+    icode = code_for_pred (MINUS, <MODE>mode);
+    rtx ops3[] = {operands[0], max, min};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
+
+    DONE;
+  });
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c
new file mode 100644
index 0000000..b675930
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117722.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O2" } */
+
+/* Generate sum of absolute difference as sub (max, min).
+   This helps with x264 sad routines.  */
+
+inline int abs(int i)
+{
+  return (i < 0 ? -i : i);
+}
+
+int pixel_sad_n(unsigned char *pix1, unsigned char *pix2, int n)
+{
+  int sum = 0;
+  for( int i = 0; i < n; i++ )
+       sum += abs(pix1[i] - pix2[i]);
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler {vmin\.v} } } */
+/* { dg-final { scan-assembler {vmax\.v} } } */
+/* { dg-final { scan-assembler {vsub\.v} } } */
author	Vineet Gupta <vineetg@rivosinc.com>	2025-01-07 14:28:25 -0800
committer	Vineet Gupta <vineetg@rivosinc.com>	2025-01-07 14:28:25 -0800
commit	b755c151fde4ad736405bb2e13a7de0420161179 (patch)
tree	5b7985fba6944b882e8c67f19dab0c2951318fdc /gcc
parent	0115ef57efa9966fa7f448185dd5c741f58d4fac (diff)
download	gcc-b755c151fde4ad736405bb2e13a7de0420161179.zip gcc-b755c151fde4ad736405bb2e13a7de0420161179.tar.gz gcc-b755c151fde4ad736405bb2e13a7de0420161179.tar.bz2