diff options
author | Pan Li <pan2.li@intel.com> | 2025-07-02 10:35:10 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2025-07-07 21:15:25 +0800 |
commit | 62b99e84b886fbdd70118cc260ae0f2516c2f3f5 (patch) | |
tree | ff906bad4ce376660d30ce6a498b96384f43fc27 | |
parent | dc30f404170f538af6bf2457ccff252b08302dec (diff) | |
download | gcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.zip gcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.tar.gz gcc-62b99e84b886fbdd70118cc260ae0f2516c2f3f5.tar.bz2 |
RISC-V: Implement unsigned scalar SAT_MUL from uint128_t
This patch would like to implement the SAT_MUL scalar unsigned from
uint128_t, aka:
NT __attribute__((noinline))
sat_u_mul_##NT##_fmt_1 (NT a, NT b)
{
uint128_t x = (uint128_t)a * (uint128_t)b;
NT max = -1;
if (x > (uint128_t)(max))
return max;
else
return (NT)x;
}
Take uint64_t and uint8_t as example:
Before this patch for uint8_t:
10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1:
11 │ mulhu a5,a0,a1
12 │ mul a0,a0,a1
13 │ bne a5,zero,.L3
14 │ li a5,255
15 │ bleu a0,a5,.L4
16 │ .L3:
17 │ li a0,255
18 │ .L4:
19 │ andi a0,a0,0xff
20 │ ret
After this patch for uint8_t:
10 │ sat_u_mul_uint8_t_from_uint128_t_fmt_1:
11 │ mul a0,a0,a1
12 │ li a5,255
13 │ sltu a5,a5,a0
14 │ neg a5,a5
15 │ or a0,a0,a5
16 │ andi a0,a0,0xff
17 │ ret
Before this patch for uint64_t:
10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1:
11 │ mulhu a5,a0,a1
12 │ mul a0,a0,a1
13 │ beq a5,zero,.L4
14 │ li a0,-1
15 │ .L4:
16 │ ret
After this patch for uint64_t:
10 │ sat_u_mul_uint64_t_from_uint128_t_fmt_1:
11 │ mulhsu a5,a1,a0
12 │ mul a0,a0,a1
13 │ snez a5,a5
14 │ neg a5,a5
15 │ or a0,a0,a5
16 │ ret
gcc/ChangeLog:
* config/riscv/riscv-protos.h (riscv_expand_usmul): Add new func
decl.
* config/riscv/riscv.cc (riscv_expand_xmode_usmul): Add new func
to expand Xmode SAT_MUL.
(riscv_expand_non_xmode_usmul): Ditto but for non-Xmode.
(riscv_expand_usmul): Add new func to implment SAT_MUL.
* config/riscv/riscv.md (usmul<mode>3): Add new pattern to match
standard name usmul.
Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 82 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.md | 11 |
3 files changed, 94 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a033120..38f63ea 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -137,6 +137,7 @@ extern void riscv_expand_usadd (rtx, rtx, rtx); extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_sssub (rtx, rtx, rtx); +extern void riscv_expand_usmul (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); extern void riscv_expand_sstrunc (rtx, rtx); extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index ecdb61e..e09c189 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -13347,6 +13347,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Implement the Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _2 = mulhu (a, b); + _overflow_p = _2 == 0; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Xmode); + + rtx mul = gen_reg_rtx (Xmode); + rtx mulhu = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + riscv_emit_binary (MULT, mul, x, y); + + if (TARGET_64BIT) + emit_insn (gen_usmuldi3_highpart (mulhu, x, y)); + else + emit_insn (gen_usmulsi3_highpart (mulhu, x, y)); + + riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode)); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, dest, mul, overflow_p); +} + +/* Implement the non-Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _max = (T)-1 + _overflow_p = _1 > _max; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant (); + + gcc_assert (mode != Xmode); + + rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND); + rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND); + rtx xmode_mul = gen_reg_rtx (Xmode); + rtx mul_max = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + uint64_t max = ((uint64_t)1 << bitsize) - 1; + + emit_move_insn (mul_max, GEN_INT (max)); + riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y); + + riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p); + + emit_move_insn (dest, gen_lowpart (mode, xmode_mul)); +} + +/* Implements the unsigned saturation mult standard name usmul for int mode. */ + +void +riscv_expand_usmul (rtx dest, rtx x, rtx y) +{ + if (GET_MODE (dest) == Xmode) + return riscv_expand_xmode_usmul (dest, x, y) ; + else + return riscv_expand_non_xmode_usmul (dest, x, y); +} + /* Implement the unsigned saturation truncation for int mode. b = SAT_TRUNC (a); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index f5ec0c5..c6661f5 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4634,6 +4634,17 @@ } ) +(define_expand "usmul<mode>3" + [(match_operand:ANYI 0 "register_operand") + (match_operand:ANYI 1 "register_operand") + (match_operand:ANYI 2 "register_operand")] + "" + { + riscv_expand_usmul (operands[0], operands[1], operands[2]); + DONE; + } +) + (define_expand "ustrunc<mode><anyi_double_truncated>2" [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand") (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")] |