diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2024-06-08 12:17:11 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2024-06-08 12:19:37 +0200 |
commit | de05e44b2ad9638d04173393b1eae3c38b2c3864 (patch) | |
tree | 9616bc696794ce3e177762d9161d3d449341cf68 /gcc | |
parent | ab50ac8180beae9001c97cc036ce0df055e25b41 (diff) | |
download | gcc-de05e44b2ad9638d04173393b1eae3c38b2c3864.zip gcc-de05e44b2ad9638d04173393b1eae3c38b2c3864.tar.gz gcc-de05e44b2ad9638d04173393b1eae3c38b2c3864.tar.bz2 |
i386: Implement .SAT_ADD for unsigned scalar integers [PR112600]
The following testcase:
unsigned
add_sat(unsigned x, unsigned y)
{
unsigned z;
return __builtin_add_overflow(x, y, &z) ? -1u : z;
}
currently compiles (-O2) to:
add_sat:
addl %esi, %edi
jc .L3
movl %edi, %eax
ret
.L3:
orl $-1, %eax
ret
We can expand through usadd{m}3 optab to use carry flag from the addition
and generate branchless code using SBB instruction implementing:
unsigned res = x + y;
res |= -(res < x);
add_sat:
addl %esi, %edi
sbbl %eax, %eax
orl %edi, %eax
ret
PR target/112600
gcc/ChangeLog:
* config/i386/i386.md (usadd<mode>3): New expander.
(x86_mov<mode>cc_0_m1_neg): Use SWI mode iterator.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr112600-a.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr112600-a.c | 32 |
2 files changed, 54 insertions, 2 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ffcf63e..bc2ef81 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9870,6 +9870,26 @@ operands[1] = force_reg (<MODE>mode, operands[1]); }) +(define_expand "usadd<mode>3" + [(set (match_operand:SWI 0 "register_operand") + (us_plus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<general_operand>")))] + "" +{ + rtx res = gen_reg_rtx (<MODE>mode); + rtx msk = gen_reg_rtx (<MODE>mode); + rtx dst; + + emit_insn (gen_add<mode>3_cc_overflow_1 (res, operands[1], operands[2])); + emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk)); + dst = expand_simple_binop (<MODE>mode, IOR, res, msk, + operands[0], 1, OPTAB_DIRECT); + + if (!rtx_equal_p (dst, operands[0])) + emit_move_insn (operands[0], dst); + DONE; +}) + ;; The patterns that match these are at the end of this file. (define_expand "<insn>xf3" @@ -24945,8 +24965,8 @@ (define_expand "x86_mov<mode>cc_0_m1_neg" [(parallel - [(set (match_operand:SWI48 0 "register_operand") - (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0)))) + [(set (match_operand:SWI 0 "register_operand") + (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0)))) (clobber (reg:CC FLAGS_REG))])]) (define_split diff --git a/gcc/testsuite/gcc.target/i386/pr112600-a.c b/gcc/testsuite/gcc.target/i386/pr112600-a.c new file mode 100644 index 0000000..fa122bc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112600-a.c @@ -0,0 +1,32 @@ +/* PR target/112600 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "sbb" 4 } } */ + +unsigned char +add_sat_char (unsigned char x, unsigned char y) +{ + unsigned char z; + return __builtin_add_overflow(x, y, &z) ? -1u : z; +} + +unsigned short +add_sat_short (unsigned short x, unsigned short y) +{ + unsigned short z; + return __builtin_add_overflow(x, y, &z) ? -1u : z; +} + +unsigned int +add_sat_int (unsigned int x, unsigned int y) +{ + unsigned int z; + return __builtin_add_overflow(x, y, &z) ? -1u : z; +} + +unsigned long +add_sat_long (unsigned long x, unsigned long y) +{ + unsigned long z; + return __builtin_add_overflow(x, y, &z) ? -1ul : z; +} |