aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-11-24 10:45:40 +0100
committerJakub Jelinek <jakub@redhat.com>2020-11-24 10:45:40 +0100
commita1dd66b108cba086f58448ccbe9bf57b0a342f9a (patch)
tree99ce7ab0567e177ed2422f49194689e11ee76b15
parent4adfcea0a1b0c6dcaefddca3d5f45dd5403b1a80 (diff)
downloadgcc-a1dd66b108cba086f58448ccbe9bf57b0a342f9a.zip
gcc-a1dd66b108cba086f58448ccbe9bf57b0a342f9a.tar.gz
gcc-a1dd66b108cba086f58448ccbe9bf57b0a342f9a.tar.bz2
i386: Add *setcc_hi_1* define_insn_and_split [PR97950]
As the following testcase shows, unlike char, int or long long sized __builtin_*_overflow{,_p}, for short sized one in most cases the ce1 pass doesn't optimize the jo/jno or jc/jnc jumps with setting of a pseudo to 0/1 into seto/setc. The reason is missing *setcc_hi_1* pattern. The following patch implements it using mode iterators so that on i486 and pentium? one can get the zero extension through and instead of movzbw. 2020-11-24 Jakub Jelinek <jakub@redhat.com> PR target/97950 * config/i386/i386.md (*setcc_si_1_and): Macroize into... (*setcc_<mode>_1_and): New define_insn_and_split with SWI24 iterator. (*setcc_si_1_movzbl): Macroize into... (*setcc_<mode>_1_movzbl): New define_insn_and_split with SWI24 iterator. * gcc.target/i386/pr97950.c: New test.
-rw-r--r--gcc/config/i386/i386.md16
-rw-r--r--gcc/testsuite/gcc.target/i386/pr97950.c153
2 files changed, 161 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2beaee5..943a1c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12714,9 +12714,9 @@
operands[2] = gen_lowpart (QImode, operands[0]);
})
-(define_insn_and_split "*setcc_si_1_and"
- [(set (match_operand:SI 0 "register_operand" "=q")
- (match_operator:SI 1 "ix86_comparison_operator"
+(define_insn_and_split "*setcc_<mode>_1_and"
+ [(set (match_operand:SWI24 0 "register_operand" "=q")
+ (match_operator:SWI24 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)]))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_PARTIAL_REG_STALL
@@ -12724,7 +12724,7 @@
"#"
"&& reload_completed"
[(set (match_dup 2) (match_dup 1))
- (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
+ (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[1] = shallow_copy_rtx (operands[1]);
@@ -12732,16 +12732,16 @@
operands[2] = gen_lowpart (QImode, operands[0]);
})
-(define_insn_and_split "*setcc_si_1_movzbl"
- [(set (match_operand:SI 0 "register_operand" "=q")
- (match_operator:SI 1 "ix86_comparison_operator"
+(define_insn_and_split "*setcc_<mode>_1_movzbl"
+ [(set (match_operand:SWI24 0 "register_operand" "=q")
+ (match_operator:SWI24 1 "ix86_comparison_operator"
[(reg FLAGS_REG) (const_int 0)]))]
"!TARGET_PARTIAL_REG_STALL
&& (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
"#"
"&& reload_completed"
[(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+ (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
{
operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], QImode);
diff --git a/gcc/testsuite/gcc.target/i386/pr97950.c b/gcc/testsuite/gcc.target/i386/pr97950.c
new file mode 100644
index 0000000..277311d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97950.c
@@ -0,0 +1,153 @@
+/* PR target/95950 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic" } */
+/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 16 } } */
+/* { dg-final { scan-assembler-times "\tsetc\t" 4 } } */
+/* { dg-final { scan-assembler-not "\tjn?a\t" } } */
+/* { dg-final { scan-assembler-not "\tjn?o\t" } } */
+/* { dg-final { scan-assembler-not "\tjn?c\t" } } */
+
+char
+f1 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+char
+f2 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+char
+f3 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+char
+f4 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f5 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f6 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f7 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+char
+f8 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+char
+f9 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+char
+f10 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f11 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+char
+f12 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f13 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f14 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f15 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f16 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f17 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f18 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f19 (short a, short b)
+{
+ return __builtin_mul_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f20 (short a, short b)
+{
+ return __builtin_add_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f21 (short a, short b)
+{
+ return __builtin_sub_overflow_p (a, b, (short) 0);
+}
+
+unsigned short
+f22 (unsigned short a, unsigned short b)
+{
+ return __builtin_mul_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f23 (unsigned short a, unsigned short b)
+{
+ return __builtin_add_overflow_p (a, b, (unsigned short) 0);
+}
+
+unsigned short
+f24 (unsigned short a, unsigned short b)
+{
+ return __builtin_sub_overflow_p (a, b, (unsigned short) 0);
+}