aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2025-07-02 10:35:10 +0800
committerPan Li <pan2.li@intel.com>2025-07-07 21:15:25 +0800
commit62b99e84b886fbdd70118cc260ae0f2516c2f3f5 (patch)
treeff906bad4ce376660d30ce6a498b96384f43fc27
parentdc30f404170f538af6bf2457ccff252b08302dec (diff)
downloadgcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.zip
gcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.tar.gz
gcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.tar.bz2
RISC-V: Implement unsigned scalar SAT_MUL from uint128_t
This patch would like to implement the SAT_MUL scalar unsigned from uint128_t, aka: NT __attribute__((noinline)) sat_u_mul_##NT##_fmt_1 (NT a, NT b) { uint128_t x = (uint128_t)a * (uint128_t)b; NT max = -1; if (x > (uint128_t)(max)) return max; else return (NT)x; } Take uint64_t and uint8_t as example: Before this patch for uint8_t: 10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1: 11 │ mulhu a5,a0,a1 12 │ mul a0,a0,a1 13 │ bne a5,zero,.L3 14 │ li a5,255 15 │ bleu a0,a5,.L4 16 │ .L3: 17 │ li a0,255 18 │ .L4: 19 │ andi a0,a0,0xff 20 │ ret After this patch for uint8_t: 10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1: 11 │ mul a0,a0,a1 12 │ li a5,255 13 │ sltu a5,a5,a0 14 │ neg a5,a5 15 │ or a0,a0,a5 16 │ andi a0,a0,0xff 17 │ ret Before this patch for uint64_t: 10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1: 11 │ mulhu a5,a0,a1 12 │ mul a0,a0,a1 13 │ beq a5,zero,.L4 14 │ li a0,-1 15 │ .L4: 16 │ ret After this patch for uint64_t: 10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1: 11 │ mulhsu a5,a1,a0 12 │ mul a0,a0,a1 13 │ snez a5,a5 14 │ neg a5,a5 15 │ or a0,a0,a5 16 │ ret gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_usmul): Add new func decl. * config/riscv/riscv.cc (riscv_expand_xmode_usmul): Add new func to expand Xmode SAT_MUL. (riscv_expand_non_xmode_usmul): Ditto but for non-Xmode. (riscv_expand_usmul): Add new func to implment SAT_MUL. * config/riscv/riscv.md (usmul<mode>3): Add new pattern to match standard name usmul. Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r--gcc/config/riscv/riscv-protos.h1
-rw-r--r--gcc/config/riscv/riscv.cc82
-rw-r--r--gcc/config/riscv/riscv.md11
3 files changed, 94 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a033120..38f63ea 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -137,6 +137,7 @@ extern void riscv_expand_usadd (rtx, rtx, rtx);
extern void riscv_expand_ssadd (rtx, rtx, rtx);
extern void riscv_expand_ussub (rtx, rtx, rtx);
extern void riscv_expand_sssub (rtx, rtx, rtx);
+extern void riscv_expand_usmul (rtx, rtx, rtx);
extern void riscv_expand_ustrunc (rtx, rtx);
extern void riscv_expand_sstrunc (rtx, rtx);
extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ecdb61e..e09c189 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13347,6 +13347,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y)
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
+/* Implement the Xmode usmul.
+
+ b = SAT_MUL (a, b);
+ =>
+ _1 = a * b;
+ _2 = mulhu (a, b);
+ _overflow_p = _2 == 0;
+ _mask = - _overflow_p;
+ b = _1 | _mask;
+ */
+
+static void
+riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+ machine_mode mode = GET_MODE (dest);
+
+ gcc_assert (mode == Xmode);
+
+ rtx mul = gen_reg_rtx (Xmode);
+ rtx mulhu = gen_reg_rtx (Xmode);
+ rtx overflow_p = gen_reg_rtx (Xmode);
+
+ riscv_emit_binary (MULT, mul, x, y);
+
+ if (TARGET_64BIT)
+ emit_insn (gen_usmuldi3_highpart (mulhu, x, y));
+ else
+ emit_insn (gen_usmulsi3_highpart (mulhu, x, y));
+
+ riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
+ riscv_emit_unary (NEG, overflow_p, overflow_p);
+ riscv_emit_binary (IOR, dest, mul, overflow_p);
+}
+
+/* Implement the non-Xmode usmul.
+
+ b = SAT_MUL (a, b);
+ =>
+ _1 = a * b;
+ _max = (T)-1
+ _overflow_p = _1 > _max;
+ _mask = - _overflow_p;
+ b = _1 | _mask;
+ */
+
+static void
+riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+ machine_mode mode = GET_MODE (dest);
+ unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+ gcc_assert (mode != Xmode);
+
+ rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+ rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
+ rtx xmode_mul = gen_reg_rtx (Xmode);
+ rtx mul_max = gen_reg_rtx (Xmode);
+ rtx overflow_p = gen_reg_rtx (Xmode);
+
+ uint64_t max = ((uint64_t)1 << bitsize) - 1;
+
+ emit_move_insn (mul_max, GEN_INT (max));
+ riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y);
+
+ riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul);
+ riscv_emit_unary (NEG, overflow_p, overflow_p);
+ riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p);
+
+ emit_move_insn (dest, gen_lowpart (mode, xmode_mul));
+}
+
+/* Implements the unsigned saturation mult standard name usmul for int mode. */
+
+void
+riscv_expand_usmul (rtx dest, rtx x, rtx y)
+{
+ if (GET_MODE (dest) == Xmode)
+ return riscv_expand_xmode_usmul (dest, x, y) ;
+ else
+ return riscv_expand_non_xmode_usmul (dest, x, y);
+}
+
/* Implement the unsigned saturation truncation for int mode.
b = SAT_TRUNC (a);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index f5ec0c5..c6661f5 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4634,6 +4634,17 @@
}
)
+(define_expand "usmul<mode>3"
+ [(match_operand:ANYI 0 "register_operand")
+ (match_operand:ANYI 1 "register_operand")
+ (match_operand:ANYI 2 "register_operand")]
+ ""
+ {
+ riscv_expand_usmul (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
+
(define_expand "ustrunc<mode><anyi_double_truncated>2"
[(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
(match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")]