aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2024-10-08 11:22:21 +0800
committerPan Li <pan2.li@intel.com>2024-10-08 22:25:39 +0800
commit110ccfa5c88544c5ec85d31b1ed2c2f9dac163fd (patch)
tree150821e65797dd0ded45c08e982eee1b2cf484c4 /gcc/config/riscv
parent2291739ec432abc01c7afc5a07443c575539316a (diff)
downloadgcc-110ccfa5c88544c5ec85d31b1ed2c2f9dac163fd.zip
gcc-110ccfa5c88544c5ec85d31b1ed2c2f9dac163fd.tar.gz
gcc-110ccfa5c88544c5ec85d31b1ed2c2f9dac163fd.tar.bz2
RISC-V: Implement scalar SAT_TRUNC for signed integer
This patch would like to implement the sstrunc for scalar signed integer. Form 1: #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \ NT __attribute__((noinline)) \ sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x) \ { \ NT trunc = (NT)x; \ return (WT)NT_MIN <= x && x <= (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX; \ } DEF_SAT_S_TRUNC_FMT_1(int64_t, int32_t, INT32_MIN, INT32_MAX) Before this patch: 10 │ sat_s_trunc_int64_t_to_int32_t_fmt_1: 11 │ li a5,1 12 │ slli a5,a5,31 13 │ li a4,-1 14 │ add a5,a0,a5 15 │ srli a4,a4,32 16 │ bgtu a5,a4,.L2 17 │ sext.w a0,a0 18 │ ret 19 │ .L2: 20 │ srai a5,a0,63 21 │ li a0,-2147483648 22 │ xor a0,a0,a5 23 │ not a0,a0 24 │ ret After this patch: 10 │ sat_s_trunc_int64_t_to_int32_t_fmt_1: 11 │ li a5,-2147483648 12 │ xori a3,a5,-1 13 │ slt a4,a0,a3 14 │ slt a5,a5,a0 15 │ and a5,a4,a5 16 │ srai a4,a0,63 17 │ xor a4,a4,a3 18 │ addi a3,a5,-1 19 │ neg a5,a5 20 │ and a4,a4,a3 21 │ and a0,a0,a5 22 │ or a0,a0,a4 23 │ sext.w a0,a0 24 │ ret The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_expand_sstrunc): Add new func decl to expand SAT_TRUNC. * config/riscv/riscv.cc (riscv_expand_sstrunc): Add new func impl to expand SAT_TRUNC. * config/riscv/riscv.md (sstrunc<mode><anyi_double_truncated>2): Add new pattern for double truncation. (sstrunc<mode><anyi_quad_truncated>2): Ditto but for quad. (sstrunc<mode><anyi_oct_truncated>2): Ditto but for oct. Signed-off-by: Pan Li <pan2.li@intel.com>
Diffstat (limited to 'gcc/config/riscv')
-rw-r--r--gcc/config/riscv/riscv-protos.h1
-rw-r--r--gcc/config/riscv/riscv.cc61
-rw-r--r--gcc/config/riscv/riscv.md30
3 files changed, 92 insertions, 0 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3d8775e..1e6d10a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -138,6 +138,7 @@ extern void riscv_expand_ssadd (rtx, rtx, rtx);
extern void riscv_expand_ussub (rtx, rtx, rtx);
extern void riscv_expand_sssub (rtx, rtx, rtx);
extern void riscv_expand_ustrunc (rtx, rtx);
+extern void riscv_expand_sstrunc (rtx, rtx);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8708a7b..57f2554 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12438,6 +12438,67 @@ riscv_expand_ustrunc (rtx dest, rtx src)
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
+/* Implement the signed saturation truncation for int mode.
+
+ b = SAT_TRUNC (a);
+ =>
+ 1. lt = a < max
+ 2. gt = min < a
+ 3. mask = lt & gt
+ 4. trunc_mask = -mask
+ 5. sat_mask = mask - 1
+ 6. lt = a < 0
+ 7. neg = -lt
+ 8. sat = neg ^ max
+ 9. trunc = src & trunc_mask
+ 10. sat = sat & sat_mask
+ 11. dest = trunc | sat */
+
+void
+riscv_expand_sstrunc (rtx dest, rtx src)
+{
+ machine_mode mode = GET_MODE (dest);
+ unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
+ HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
+ HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
+
+ rtx xmode_narrow_max = gen_reg_rtx (Xmode);
+ rtx xmode_narrow_min = gen_reg_rtx (Xmode);
+ rtx xmode_lt = gen_reg_rtx (Xmode);
+ rtx xmode_gt = gen_reg_rtx (Xmode);
+ rtx xmode_src = gen_lowpart (Xmode, src);
+ rtx xmode_dest = gen_reg_rtx (Xmode);
+ rtx xmode_mask = gen_reg_rtx (Xmode);
+ rtx xmode_sat = gen_reg_rtx (Xmode);
+ rtx xmode_trunc = gen_reg_rtx (Xmode);
+ rtx xmode_sat_mask = gen_reg_rtx (Xmode);
+ rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
+
+ /* Step-1: lt = src < max, gt = min < src, mask = lt & gt */
+ emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
+ emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
+ riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max);
+ riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src);
+ riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt);
+
+ /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask */
+ riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX (Xmode));
+ riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask);
+
+ /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max */
+ riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode));
+ riscv_emit_unary (NEG, xmode_lt, xmode_lt);
+ riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max);
+
+ /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask) */
+ riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask);
+ riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask);
+ riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat);
+
+ /* Step-5: dest = xmode_dest */
+ emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
TI_LONG_DOUBLE_TYPE which is for long double type, go with the
default one for the others. */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 067c241..688c07d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4413,6 +4413,16 @@
}
)
+(define_expand "sstrunc<mode><anyi_double_truncated>2"
+ [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_sstrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
(define_expand "ustrunc<mode><anyi_quad_truncated>2"
[(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
(match_operand:ANYI_QUAD_TRUNC 1 "register_operand")]
@@ -4423,6 +4433,16 @@
}
)
+(define_expand "sstrunc<mode><anyi_quad_truncated>2"
+ [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_QUAD_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_sstrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
(define_expand "ustrunc<mode><anyi_oct_truncated>2"
[(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
(match_operand:ANYI_OCT_TRUNC 1 "register_operand")]
@@ -4433,6 +4453,16 @@
}
)
+(define_expand "sstrunc<mode><anyi_oct_truncated>2"
+ [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+ (match_operand:ANYI_OCT_TRUNC 1 "register_operand")]
+ ""
+ {
+ riscv_expand_sstrunc (operands[0], operands[1]);
+ DONE;
+ }
+)
+
;; These are forms of (x << C1) + C2, potentially canonicalized from
;; ((x + C2') << C1. Depending on the cost to load C2 vs C2' we may
;; want to go ahead and recognize this form as C2 may be cheaper to