diff options
author | Pan Li <pan2.li@intel.com> | 2024-06-11 11:04:22 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-06-11 16:24:19 +0800 |
commit | 8087204a4260a552c7cee37d1fb46cec7edfe9ee (patch) | |
tree | 974e1febc2448f9bf64fbcc37059f5b086f2f510 | |
parent | 66d6b1861ec57ba29540a5fa7854df3978ba5409 (diff) | |
download | gcc-8087204a4260a552c7cee37d1fb46cec7edfe9ee.zip gcc-8087204a4260a552c7cee37d1fb46cec7edfe9ee.tar.gz gcc-8087204a4260a552c7cee37d1fb46cec7edfe9ee.tar.bz2 |
RISC-V: Implement .SAT_SUB for unsigned vector int
As the middle support of .SAT_SUB committed, implement the unsigned
vector int of .SAT_SUB for the riscv backend. Consider below example
code:
void __attribute__((noinline)) \
vec_sat_u_sub_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = (x - y) & (-(T)(x >= y)); \
} \
}
Before this patch:
...
vsetvli a5,a3,e64,m1,ta,mu
slli a4,a5,3
vle64.v v2,0(a1)
vle64.v v1,0(a2)
vmsgeu.vv v0,v2,v1
vmv1r.v v3,v4
vsub.vv v3,v2,v1,v0.t
vse64.v v3,0(a0)
...
After this patch:
...
vsetvli a5,a3,e64,m1,ta,ma
slli a4,a5,3
vle64.v v1,0(a1)
vle64.v v2,0(a2)
vssubu.vv v1,v1,v2
vse64.v v1,0(a0)
...
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec.md (ussub<mode>3): Add new pattern impl
for the unsigned vector modes.
* config/riscv/riscv-protos.h (expand_vec_ussub): Add new func
decl to expand .SAT_SUB for vector mode.
* config/riscv/riscv-v.cc (emit_vec_saddu): Add new func impl
to expand .SAT_SUB for vector mode.
(emit_vec_binary_alu): Add new helper func to emit binary alu.
(expand_vec_ussub): Leverage above helper func.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/sat_arith.h: Add helper macros for test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-4.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
12 files changed, 437 insertions, 5 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 15db26d..0b1e50d 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2644,6 +2644,7 @@ ;; ========================================================================= ;; Includes: ;; - add +;; - sub ;; ========================================================================= (define_expand "usadd<mode>3" [(match_operand:V_VLSI 0 "register_operand") @@ -2656,6 +2657,17 @@ } ) +(define_expand "ussub<mode>3" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand") + (match_operand:V_VLSI 2 "register_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], <MODE>mode); + DONE; + } +) + ;; ========================================================================= ;; == Early break auto-vectorization patterns ;; ========================================================================= diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 09eb3a5..d6473d0 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -638,6 +638,7 @@ void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode); void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode); void expand_vec_usadd (rtx, rtx, rtx, machine_mode); +void expand_vec_ussub (rtx, rtx, rtx, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 948aaf7..8911f57 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4634,13 +4634,13 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask, } static void -emit_vec_saddu (rtx op_dest, rtx op_1, rtx op_2, insn_type type, - machine_mode vec_mode) +emit_vec_binary_alu (rtx op_dest, rtx op_1, rtx op_2, enum rtx_code rcode, + machine_mode vec_mode) { rtx ops[] = {op_dest, op_1, op_2}; - insn_code icode = code_for_pred (US_PLUS, vec_mode); + insn_code icode = code_for_pred (rcode, vec_mode); - emit_vlmax_insn (icode, type, ops); + emit_vlmax_insn (icode, BINARY_OP, ops); } void @@ -4876,7 +4876,16 @@ expand_vec_lfloor (rtx op_0, rtx op_1, machine_mode vec_fp_mode, void expand_vec_usadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) { - emit_vec_saddu (op_0, op_1, op_2, BINARY_OP, vec_mode); + emit_vec_binary_alu (op_0, op_1, op_2, US_PLUS, vec_mode); +} + +/* Expand the standard name usadd<mode>3 for vector mode, we can leverage + the vector fixed point vector single-width saturating add directly. */ + +void +expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode) +{ + emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode); } /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c new file mode 100644 index 0000000..1e6e323 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../../../sat_arith.h" + +/* +** vec_sat_u_sub_uint8_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma +** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_SUB_FMT_1(uint8_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c new file mode 100644 index 0000000..9c57056 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../../../sat_arith.h" + +/* +** vec_sat_u_sub_uint16_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma +** ... +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_SUB_FMT_1(uint16_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c new file mode 100644 index 0000000..795d5ff --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../../../sat_arith.h" + +/* +** vec_sat_u_sub_uint32_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma +** ... +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_SUB_FMT_1(uint32_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c new file mode 100644 index 0000000..00527c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "../../../sat_arith.h" + +/* +** vec_sat_u_sub_uint64_t_fmt_1: +** ... +** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma +** ... +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) +** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** ... +*/ +DEF_VEC_SAT_U_SUB_FMT_1(uint64_t) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 4 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-1.c new file mode 100644 index 0000000..4f6b7927 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-1.c @@ -0,0 +1,75 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../../../sat_arith.h" + +#define T uint8_t +#define N 16 +#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_1 + +DEF_VEC_SAT_U_SUB_FMT_1(T) + +T test_data[][3][N] = { + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, /* arg_0 */ + { + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + }, /* arg_1 */ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, /* expect */ + }, + { + { + 0, 255, 255, 255, + 0, 255, 255, 255, + 0, 255, 255, 255, + 0, 255, 255, 255, + }, + { + 1, 255, 254, 251, + 1, 255, 254, 251, + 1, 255, 254, 251, + 1, 255, 254, 251, + }, + { + 0, 0, 1, 4, + 0, 0, 1, 4, + 0, 0, 1, 4, + 0, 0, 1, 4, + }, + }, + { + { + 0, 0, 1, 0, + 1, 2, 3, 0, + 1, 2, 3, 255, + 5, 254, 255, 9, + }, + { + 0, 1, 0, 254, + 254, 254, 254, 255, + 255, 255, 0, 252, + 255, 255, 255, 1, + }, + { + 0, 0, 1, 0, + 0, 0, 0, 0, + 0, 0, 3, 3, + 0, 0, 0, 8, + }, + }, +}; + +#include "vec_sat_binary.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-2.c new file mode 100644 index 0000000..8b115ea --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-2.c @@ -0,0 +1,75 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../../../sat_arith.h" + +#define T uint16_t +#define N 16 +#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_1 + +DEF_VEC_SAT_U_SUB_FMT_1(T) + +T test_data[][3][N] = { + { + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, /* arg_0 */ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, /* arg_1 */ + { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }, /* expect */ + }, + { + { + 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, + }, + { + 55535, 45535, 35535, 25535, + 55535, 45535, 35535, 25535, + 55535, 45535, 35535, 25535, + 55535, 45535, 35535, 25535, + }, + { + 10000, 20000, 30000, 40000, + 10000, 20000, 30000, 40000, + 10000, 20000, 30000, 40000, + 10000, 20000, 30000, 40000, + }, + }, + { + { + 0, 0, 1, 0, + 1, 2, 3, 0, + 1, 65535, 3, 65535, + 5, 65534, 65535, 9, + }, + { + 0, 1, 1, 65534, + 65534, 65534, 1, 65535, + 0, 65535, 65535, 0, + 65535, 65535, 1, 2, + }, + { + 0, 0, 0, 0, + 0, 0, 2, 0, + 1, 0, 0, 65535, + 0, 0, 65534, 7, + }, + }, +}; + +#include "vec_sat_binary.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-3.c new file mode 100644 index 0000000..aa47ef7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-3.c @@ -0,0 +1,75 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../../../sat_arith.h" + +#define T uint32_t +#define N 16 +#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_1 + +DEF_VEC_SAT_U_SUB_FMT_1(T) + +T test_data[][3][N] = { + { + { + 0, 0, 4, 0, + 0, 0, 4, 0, + 0, 0, 4, 0, + 0, 0, 4, 0, + }, /* arg_0 */ + { + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + 0, 1, 2, 3, + }, /* arg_1 */ + { + 0, 0, 2, 0, + 0, 0, 2, 0, + 0, 0, 2, 0, + 0, 0, 2, 0, + }, /* expect */ + }, + { + { + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, + }, + { + 1294967295, 2294967295, 3294967295, 4294967295, + 1294967295, 2294967295, 3294967295, 4294967295, + 1294967295, 2294967295, 3294967295, 4294967295, + 1294967295, 2294967295, 3294967295, 4294967295, + }, + { + 3000000000, 2000000000, 1000000000, 0, + 3000000000, 2000000000, 1000000000, 0, + 3000000000, 2000000000, 1000000000, 0, + 3000000000, 2000000000, 1000000000, 0, + }, + }, + { + { + 0, 0, 9, 0, + 1, 4294967295, 3, 0, + 1, 2, 3, 4, + 5, 4294967294, 4294967295, 4294967295, + }, + { + 0, 1, 1, 4294967294, + 1, 2, 4294967294, 4294967295, + 1, 4294967295, 4294967295, 1, + 1, 4294967295, 4294967290, 9, + }, + { + 0, 0, 8, 0, + 0, 4294967293, 0, 0, + 0, 0, 0, 3, + 4, 0, 5, 4294967286, + }, + }, +}; + +#include "vec_sat_binary.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-4.c new file mode 100644 index 0000000..91daf3a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-run-4.c @@ -0,0 +1,75 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "../../../sat_arith.h" + +#define T uint64_t +#define N 16 +#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_SUB_FMT_1 + +DEF_VEC_SAT_U_SUB_FMT_1(T) + +T test_data[][3][N] = { + { + { + 0, 9, 0, 0, + 0, 9, 0, 0, + 0, 9, 0, 0, + 0, 9, 0, 0, + }, /* arg_0 */ + { + 0, 2, 3, 1, + 0, 2, 3, 1, + 0, 2, 3, 1, + 0, 2, 3, 1, + }, /* arg_1 */ + { + 0, 7, 0, 0, + 0, 7, 0, 0, + 0, 7, 0, 0, + 0, 7, 0, 0, + }, /* expect */ + }, + { + { + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, + }, + { + 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u, + 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u, + 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u, + 10446744073709551615u, 11446744073709551615u, 12446744073709551615u, 18446744073709551615u, + }, + { + 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u, + 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u, + 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u, + 8000000000000000000u, 7000000000000000000u, 6000000000000000000u, 0u, + }, + }, + { + { + 0, 18446744073709551615u, 1, 0, + 1, 18446744073709551615u, 3, 0, + 1, 18446744073709551614u, 3, 4, + 5, 18446744073709551614u, 18446744073709551615u, 9, + }, + { + 0, 1, 1, 18446744073709551614u, + 18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u, + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, + 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 1, + }, + { + 0, 18446744073709551614u, 0, 0, + 0, 1, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 8, + }, + }, +}; + +#include "vec_sat_binary.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 9c60ac0..bc9a372 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -95,4 +95,35 @@ sat_u_sub_##T##_fmt_2 (T x, T y) \ #define RUN_SAT_U_SUB_FMT_1(T, x, y) sat_u_sub_##T##_fmt_1(x, y) #define RUN_SAT_U_SUB_FMT_2(T, x, y) sat_u_sub_##T##_fmt_2(x, y) +#define DEF_VEC_SAT_U_SUB_FMT_1(T) \ +void __attribute__((noinline)) \ +vec_sat_u_sub_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + T x = op_1[i]; \ + T y = op_2[i]; \ + out[i] = (x - y) & (-(T)(x >= y)); \ + } \ +} + +#define DEF_VEC_SAT_U_SUB_FMT_2(T) \ +void __attribute__((noinline)) \ +vec_sat_u_sub_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \ +{ \ + unsigned i; \ + for (i = 0; i < limit; i++) \ + { \ + T x = op_1[i]; \ + T y = op_2[i]; \ + out[i] = (x - y) & (-(T)(x > y)); \ + } \ +} + +#define RUN_VEC_SAT_U_SUB_FMT_1(T, out, op_1, op_2, N) \ + vec_sat_u_sub_##T##_fmt_1(out, op_1, op_2, N) +#define RUN_VEC_SAT_U_SUB_FMT_2(T, out, op_1, op_2, N) \ + vec_sat_u_sub_##T##_fmt_2(out, op_1, op_2, N) + #endif |