aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-12-20 21:15:50 +0100
committerUros Bizjak <ubizjak@gmail.com>2021-12-20 21:16:39 +0100
commit72c68d7ad90221075102f08f22256d0b4a7631b3 (patch)
tree201ad2daa9fc9a829011d04b414ac16e28ad7ac9 /gcc/config
parentab85331c58223e483c55ff0316a92265d7910e9b (diff)
downloadgcc-72c68d7ad90221075102f08f22256d0b4a7631b3.zip
gcc-72c68d7ad90221075102f08f22256d0b4a7631b3.tar.gz
gcc-72c68d7ad90221075102f08f22256d0b4a7631b3.tar.bz2
i386: Fix <sse2p4_1>_pinsr<ssemodesuffix> and its splitters [PR103772]
The clever trick to duplicate the value of the input operand into itself proved not so clever after all. The splitter should not clobber the input operand in any case, since the register can hold the value outside the HImode lowpart when accessed as subreg. Use the standard earlyclobber approach instead. The testcase fails with avx2 ISA, but I was not able to create the testcase that wouldn't require -mavx512fp16 compile flag. 2021-12-20 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103772 * config/i386/sse.md (<sse2p4_1>_pinsr<ssemodesuffix>): Add earlyclobber to (x,x,x,i) alternative. (<sse2p4_1>_pinsr<ssemodesuffix> peephole2): Remove. (<sse2p4_1>_pinsr<ssemodesuffix> splitter): Use output operand as a temporary register. Split after reload_completed.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/sse.md31
1 files changed, 4 insertions, 27 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149..cb1c0b1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17430,7 +17430,7 @@
;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
- [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,x")
+ [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,&x")
(vec_merge:PINSR_MODE
(vec_duplicate:PINSR_MODE
(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m,x"))
@@ -17499,25 +17499,6 @@
(const_string "*")))])
;; For TARGET_AVX2, implement insert from XMM reg with PBROADCASTW + PBLENDW.
-;; First try to get a scratch register and go through it. In case this fails,
-;; overwrite source reg with broadcasted value and blend from there.
-(define_peephole2
- [(match_scratch:V8_128 4 "x")
- (set (match_operand:V8_128 0 "sse_reg_operand")
- (vec_merge:V8_128
- (vec_duplicate:V8_128
- (match_operand:<ssescalarmode> 2 "sse_reg_operand"))
- (match_operand:V8_128 1 "sse_reg_operand")
- (match_operand:SI 3 "const_int_operand")))]
- "TARGET_AVX2
- && INTVAL (operands[3]) > 1
- && ((unsigned) exact_log2 (INTVAL (operands[3]))
- < GET_MODE_NUNITS (<MODE>mode))"
- [(set (match_dup 4)
- (vec_duplicate:V8_128 (match_dup 2)))
- (set (match_dup 0)
- (vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))])
-
(define_split
[(set (match_operand:V8_128 0 "sse_reg_operand")
(vec_merge:V8_128
@@ -17525,18 +17506,14 @@
(match_operand:<ssescalarmode> 2 "sse_reg_operand"))
(match_operand:V8_128 1 "sse_reg_operand")
(match_operand:SI 3 "const_int_operand")))]
- "TARGET_AVX2 && epilogue_completed
+ "TARGET_AVX2 && reload_completed
&& INTVAL (operands[3]) > 1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (<MODE>mode))"
- [(set (match_dup 4)
+ [(set (match_dup 0)
(vec_duplicate:V8_128 (match_dup 2)))
(set (match_dup 0)
- (vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))]
-{
- operands[4] = lowpart_subreg (<MODE>mode, operands[2],
- <ssescalarmode>mode);
-})
+ (vec_merge:V8_128 (match_dup 0) (match_dup 1) (match_dup 3)))])
(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
[(match_operand:AVX512_VEC 0 "register_operand")