diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-10-30 14:05:25 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-10-31 11:24:45 +0800 |
commit | f5d33d0c790a00cb5240a966dd99df9961bf57a0 (patch) | |
tree | 8f9ae63cac28b3e344b677d0fca65c9b82e40927 /gcc | |
parent | 541b754c77ab806a9dae9bbaae69722e2c36f0f0 (diff) | |
download | gcc-f5d33d0c790a00cb5240a966dd99df9961bf57a0.zip gcc-f5d33d0c790a00cb5240a966dd99df9961bf57a0.tar.gz gcc-f5d33d0c790a00cb5240a966dd99df9961bf57a0.tar.bz2 |
Fix wrong code due to incorrect define_split
-(define_split
- [(set (match_operand:V2HI 0 "register_operand")
- (eq:V2HI
- (eq:V2HI
- (us_minus:V2HI
- (match_operand:V2HI 1 "register_operand")
- (match_operand:V2HI 2 "register_operand"))
- (match_operand:V2HI 3 "const0_operand"))
- (match_operand:V2HI 4 "const0_operand")))]
- "TARGET_SSE4_1"
- [(set (match_dup 0)
- (umin:V2HI (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (eq:V2HI (match_dup 0) (match_dup 2)))])
the splitter is wrong when op1 == op2.(the original pattern returns 0, after split, it returns 1)
So remove the splitter.
Also extend another define_split to define_insn_and_split to handle
below pattern
494(set (reg:V4QI 112)
495 (unspec:V4QI [
496 (subreg:V4QI (reg:V2HF 111 [ bf ]) 0)
497 (subreg:V4QI (reg:V2HF 110 [ af ]) 0)
498 (subreg:V4QI (eq:V2HI (eq:V2HI (reg:V2HI 105)
499 (const_vector:V2HI [
500 (const_int 0 [0]) repeated x2
501 ]))
502 (const_vector:V2HI [
503 (const_int 0 [0]) repeated x2
504 ])) 0)
505 ] UNSPEC_BLENDV))
define_split doesn't work since pass_combine assume it produces at
most 2 insns after split, but here it produces 3 since we need to move
const0_rtx (V2HImode) to reg. The move insn can be eliminated later.
gcc/ChangeLog:
PR target/112276
* config/i386/mmx.md (*mmx_pblendvb_v8qi_1): Change
define_split to define_insn_and_split to handle
immediate_operand for comparison.
(*mmx_pblendvb_v8qi_2): Ditto.
(*mmx_pblendvb_<mode>_1): Ditto.
(*mmx_pblendvb_v4qi_2): Ditto.
(<code><mode>3): Remove define_split after it.
(<code>v8qi3): Ditto.
(<code><mode>3): Ditto.
(<ode>v2hi3): Ditto.
gcc/testsuite/ChangeLog:
* g++.target/i386/part-vect-vcondhf.C: Adjust testcase.
* gcc.target/i386/pr112276.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/mmx.md | 112 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/i386/part-vect-vcondhf.C | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr112276.c | 36 |
3 files changed, 70 insertions, 79 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index e3d0fb5..2b97bb8 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3360,21 +3360,6 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V4HI 0 "register_operand") - (eq:V4HI - (eq:V4HI - (us_minus:V4HI - (match_operand:V4HI 1 "register_operand") - (match_operand:V4HI 2 "register_operand")) - (match_operand:V4HI 3 "const0_operand")) - (match_operand:V4HI 4 "const0_operand")))] - "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" - [(set (match_dup 0) - (umin:V4HI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V4HI (match_dup 0) (match_dup 2)))]) - (define_expand "mmx_<code>v8qi3" [(set (match_operand:V8QI 0 "register_operand") (umaxmin:V8QI @@ -3408,21 +3393,6 @@ (match_operand:V8QI 2 "register_operand")))] "TARGET_MMX_WITH_SSE") -(define_split - [(set (match_operand:V8QI 0 "register_operand") - (eq:V8QI - (eq:V8QI - (us_minus:V8QI - (match_operand:V8QI 1 "register_operand") - (match_operand:V8QI 2 "register_operand")) - (match_operand:V8QI 3 "const0_operand")) - (match_operand:V8QI 4 "const0_operand")))] - "TARGET_MMX_WITH_SSE" - [(set (match_dup 0) - (umin:V8QI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V8QI (match_dup 0) (match_dup 2)))]) - (define_insn "<code><mode>3" [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw") (umaxmin:VI1_16_32 @@ -3436,21 +3406,6 @@ (set_attr "type" "sseiadd") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V4QI 0 "register_operand") - (eq:V4QI - (eq:V4QI - (us_minus:V4QI - (match_operand:V4QI 1 "register_operand") - (match_operand:V4QI 2 "register_operand")) - (match_operand:V4QI 3 "const0_operand")) - (match_operand:V4QI 4 "const0_operand")))] - "TARGET_SSE2" - [(set (match_dup 0) - (umin:V4QI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V4QI (match_dup 0) (match_dup 2)))]) - (define_insn "<code>v2hi3" [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv") (umaxmin:V2HI @@ -3467,21 +3422,6 @@ (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) -(define_split - [(set (match_operand:V2HI 0 "register_operand") - (eq:V2HI - (eq:V2HI - (us_minus:V2HI - (match_operand:V2HI 1 "register_operand") - (match_operand:V2HI 2 "register_operand")) - (match_operand:V2HI 3 "const0_operand")) - (match_operand:V2HI 4 "const0_operand")))] - "TARGET_SSE4_1" - [(set (match_dup 0) - (umin:V2HI (match_dup 1) (match_dup 2))) - (set (match_dup 0) - (eq:V2HI (match_dup 0) (match_dup 2)))]) - (define_insn "ssse3_abs<mode>2" [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv") (abs:MMXMODEI @@ -3954,7 +3894,7 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "TI")]) -(define_split +(define_insn_and_split "*mmx_pblendvb_v8qi_1" [(set (match_operand:V8QI 0 "register_operand") (unspec:V8QI [(match_operand:V8QI 1 "register_operand") @@ -3962,21 +3902,26 @@ (eq:V8QI (eq:V8QI (match_operand:V8QI 3 "register_operand") - (match_operand:V8QI 4 "register_operand")) + (match_operand:V8QI 4 "nonmemory_operand")) (match_operand:V8QI 5 "const0_operand"))] UNSPEC_BLENDV))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:V8QI (match_dup 3) (match_dup 4))) + (eq:V8QI (match_dup 3) (match_dup 7))) (set (match_dup 0) (unspec:V8QI [(match_dup 2) (match_dup 1) (match_dup 6)] UNSPEC_BLENDV))] - "operands[6] = gen_reg_rtx (V8QImode);") +{ + operands[6] = gen_reg_rtx (V8QImode); + operands[7] = force_reg (V8QImode, operands[4]); +}) -(define_split +(define_insn_and_split "*mmx_pblendvb_v8qi_2" [(set (match_operand:V8QI 0 "register_operand") (unspec:V8QI [(match_operand:V8QI 1 "register_operand") @@ -3985,12 +3930,14 @@ (eq:MMXMODE24 (eq:MMXMODE24 (match_operand:MMXMODE24 3 "register_operand") - (match_operand:MMXMODE24 4 "register_operand")) + (match_operand:MMXMODE24 4 "nonmemory_operand")) (match_operand:MMXMODE24 5 "const0_operand")) 0)] UNSPEC_BLENDV))] - "TARGET_MMX_WITH_SSE" + "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:MMXMODE24 (match_dup 3) (match_dup 4))) + (eq:MMXMODE24 (match_dup 3) (match_dup 8))) (set (match_dup 0) (unspec:V8QI [(match_dup 2) @@ -4000,6 +3947,7 @@ { operands[6] = gen_reg_rtx (<MODE>mode); operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode); + operands[8] = force_reg (<MODE>mode, operands[4]); }) (define_insn "mmx_pblendvb_<mode>" @@ -4022,7 +3970,7 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "TI")]) -(define_split +(define_insn_and_split "*mmx_pblendvb_<mode>_1" [(set (match_operand:VI_16_32 0 "register_operand") (unspec:VI_16_32 [(match_operand:VI_16_32 1 "register_operand") @@ -4030,21 +3978,26 @@ (eq:VI_16_32 (eq:VI_16_32 (match_operand:VI_16_32 3 "register_operand") - (match_operand:VI_16_32 4 "register_operand")) + (match_operand:VI_16_32 4 "nonmemory_operand")) (match_operand:VI_16_32 5 "const0_operand"))] UNSPEC_BLENDV))] - "TARGET_SSE2" + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:VI_16_32 (match_dup 3) (match_dup 4))) + (eq:VI_16_32 (match_dup 3) (match_dup 7))) (set (match_dup 0) (unspec:VI_16_32 [(match_dup 2) (match_dup 1) (match_dup 6)] UNSPEC_BLENDV))] - "operands[6] = gen_reg_rtx (<MODE>mode);") +{ + operands[6] = gen_reg_rtx (<MODE>mode); + operands[7] = force_reg (<MODE>mode, operands[4]); +}) -(define_split +(define_insn_and_split "*mmx_pblendvb_v4qi_2" [(set (match_operand:V4QI 0 "register_operand") (unspec:V4QI [(match_operand:V4QI 1 "register_operand") @@ -4053,12 +4006,14 @@ (eq:V2HI (eq:V2HI (match_operand:V2HI 3 "register_operand") - (match_operand:V2HI 4 "register_operand")) + (match_operand:V2HI 4 "nonmemory_operand")) (match_operand:V2HI 5 "const0_operand")) 0)] UNSPEC_BLENDV))] - "TARGET_SSE2" + "TARGET_SSE2 && ix86_pre_reload_split ()" + "#" + "&& 1" [(set (match_dup 6) - (eq:V2HI (match_dup 3) (match_dup 4))) + (eq:V2HI (match_dup 3) (match_dup 8))) (set (match_dup 0) (unspec:V4QI [(match_dup 2) @@ -4068,6 +4023,7 @@ { operands[6] = gen_reg_rtx (V2HImode); operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode); + operands[8] = force_reg (V2HImode, operands[4]); }) ;; XOP parallel XMM conditional moves diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C index f1972781..e623e6c 100644 --- a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C @@ -3,7 +3,6 @@ /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ /* { dg-final { scan-assembler-times "vpcmpeqw" 6 } } */ /* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */ -/* { dg-final { scan-assembler-times "vpminuw" 2 } } */ /* { dg-final { scan-assembler-times "vcmpph" 8 } } */ /* { dg-final { scan-assembler-times "vpblendvb" 8 } } */ typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu; diff --git a/gcc/testsuite/gcc.target/i386/pr112276.c b/gcc/testsuite/gcc.target/i386/pr112276.c new file mode 100644 index 0000000..5365313 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112276.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ + +#include "sse4_1-check.h" + +typedef unsigned short __attribute__((__vector_size__ (8))) U4; +typedef unsigned short __attribute__((__vector_size__ (4))) U2; + +U4 +__attribute__((noipa)) +foo4 (U4 a, U4 b) +{ + return a > b; +} + +U2 +__attribute__((noipa)) +foo2 (U2 a, U2 b) +{ + return a > b; +} + +static void +sse4_1_test () +{ + U4 a = __extension__(U4) {1, 1, 1, 1}; + U4 b = foo4 (a, a); + if (b[0] || b[1] || b[2] || b[3]) __builtin_abort(); + + U2 c = __extension__(U2) {1, 1}; + U2 d = foo2 (c, c); + if (d[0] || d[1]) __builtin_abort(); + + return; +} |