RISC-V: Support FP round auto-vectorization

This patch would like to support auto-vectorization for the round API in math.h. It depends on the -ffast-math option. When we would like to call round/roundf like v2 = round (v1), we will convert it into below insns (reference the implementation of llvm). * vfcvt.x.f v3, v1, RMM * vfcvt.f.x v2, v3 However, the floating point value may not need the cvt as above if its mantissa is zero. Take single precision floating point as example: +------------+---------------+-----------------+ | raw float | binary layout | after round | +------------+---------------+-----------------+ | -8388607.5 | 0xcaffffff | -8388608.0 | | 8388607.5 | 0x4affffff | 8388608.0 | | 8388608.0 | 0x4b000000 | 8388608.0 | | 8388609.0 | 0x4b000001 | 8388609.0 | +------------+---------------+-----------------+ All single floating point >= 8388608.0 will have all zero mantisaa. We leverage vmflt and mask to filter them out in vector and only do the cvt on mask. Befor this patch: math-round-1.c:21:1: missed: couldn't vectorize loop ... .L3: flw fa0,0(s0) addi s0,s0,4 addi s1,s1,4 call round fsw fa0,-4(s1) bne s0,s2,.L3 After this patch: ... fsrmi 4 // RMM, rounding to nearest, ties to max magnitude .L4: vfabs.v v2,v1 vmflt.vf v0,v2,fa5 vfcvt.x.f.v v4,v1,v0.t vfcvt.f.x.v v2,v4,v0.t vfsgnj.vv v2,v2,v1 bne .L4 .L14: fsrm a6 ret Please note VLS mode is also involved in this patch and covered by the test cases. gcc/ChangeLog: * config/riscv/autovec.md (round<mode>2): New pattern. * config/riscv/riscv-protos.h (enum insn_flags): New enum type. (enum insn_type): Ditto. (expand_vec_round): New function decl. * config/riscv/riscv-v.cc (expand_vec_round): New function impl. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/math-round-0.c: New test. * gcc.target/riscv/rvv/autovec/unop/math-round-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/math-round-2.c: New test. * gcc.target/riscv/rvv/autovec/unop/math-round-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/math-round-run-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/math-round-run-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/math-round-1.c: New test. Signed-off-by: Pan Li <pan2.li@intel.com>
author: Pan Li <pan2.li@intel.com> 2023-09-26 16:28:54 +0800
committer: Pan Li <pan2.li@intel.com> 2023-09-26 19:37:51 +0800
commit: d324984f9fa50e0055e3818f29d8c019df08cecf (patch)
tree: d90f53949fd5f49717da02f16f9c5ee2ebb60845 /gcc/config/riscv/riscv-protos.h
parent: 31ef3fe9e49b5446d465628830a9b6591c2168ce (diff)
download: gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.zip
gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.tar.gz
gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.tar.bz2
1 files changed, 5 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 629adee..70ca244 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -256,6 +256,9 @@ enum insn_flags : unsigned int
 
   /* Means INSN has FRM operand and the value is FRM_RDN.  */
   FRM_RDN_P = 1 << 17,
+
+  /* Means INSN has FRM operand and the value is FRM_RMM.  */
+  FRM_RMM_P = 1 << 18,
 };
 
 enum insn_type : unsigned int
@@ -299,6 +302,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
+  UNARY_OP_TAMU_FRM_RMM = UNARY_OP_TAMU | FRM_RMM_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -463,6 +467,7 @@ void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_rint (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_round (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
author	Pan Li <pan2.li@intel.com>	2023-09-26 16:28:54 +0800
committer	Pan Li <pan2.li@intel.com>	2023-09-26 19:37:51 +0800
commit	d324984f9fa50e0055e3818f29d8c019df08cecf (patch)
tree	d90f53949fd5f49717da02f16f9c5ee2ebb60845 /gcc/config/riscv/riscv-protos.h
parent	31ef3fe9e49b5446d465628830a9b6591c2168ce (diff)
download	gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.zip gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.tar.gz gcc-d324984f9fa50e0055e3818f29d8c019df08cecf.tar.bz2