aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2022-01-04 19:41:47 +0100
committerUros Bizjak <ubizjak@gmail.com>2022-01-04 19:43:54 +0100
commit708b87dcb6e48cb48d170a4b3625088995377a5c (patch)
tree36d096d9dc06a0844840e21628ed42aad606f651 /gcc/config
parentdd8020032401f6dcd76c41017fcf75816490836d (diff)
downloadgcc-708b87dcb6e48cb48d170a4b3625088995377a5c.zip
gcc-708b87dcb6e48cb48d170a4b3625088995377a5c.tar.gz
gcc-708b87dcb6e48cb48d170a4b3625088995377a5c.tar.bz2
i386: Introduce V2QImode vectorized logic [PR103861]
Add V2QImode logic operations with SSE and GP registers and split them to V4QImode SSE instructions or SImode GP instructions. The patch also fixes PR target/103900. 2022-01-04 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/mmx.md (one_cmplv2qi3): New insn pattern. (one_cmplv2qi3 splitters): New post-reload splitters. (*andnotv2qi3): New insn pattern. (andnotv2qi3 splitters): New post-reload splitters. (<any_logic:code>v2qi3): New insn pattern. (<any_logic:insn>v2qi3 splitters): New post-reload splitters. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/warn-vect-op-2.c: Adjust warnings. * gcc.target/i386/pr103900.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/mmx.md146
1 files changed, 146 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5b33d3c..fc8ec5e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2745,6 +2745,45 @@
"TARGET_SSE2"
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
+(define_insn "one_cmplv2qi2"
+ [(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v")
+ (not:V2QI
+ (match_operand:V2QI 1 "register_operand" "0,x,v")))]
+ ""
+ "#"
+ [(set_attr "isa" "*,sse2,avx512vl")
+ (set_attr "type" "negnot,sselog,sselog")
+ (set_attr "mode" "SI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (not:V2QI
+ (match_operand:V2QI 1 "general_reg_operand")))]
+ "reload_completed"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))]
+{
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (not:V2QI
+ (match_operand:V2QI 1 "sse_reg_operand")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (xor:V4QI
+ (match_dup 0) (match_dup 1)))]
+{
+ emit_insn
+ (gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])),
+ CONSTM1_RTX (V16QImode)));
+
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
(define_insn "mmx_andnot<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
@@ -2775,6 +2814,69 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
+(define_insn "*andnotv2qi3"
+ [(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v"))
+ (match_operand:V2QI 2 "register_operand" "r,r,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl")
+ (set_attr "type" "alu,bitmanip,sselog,sselog,sselog")
+ (set_attr "mode" "SI,SI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "general_reg_operand"))
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_BMI && reload_completed"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))
+ (parallel
+ [(set (match_dup 0)
+ (and:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (and:V2QI
+ (not:V2QI (match_operand:V2QI 1 "sse_reg_operand"))
+ (match_operand:V2QI 2 "sse_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))]
+{
+ operands[2] = gen_lowpart (V4QImode, operands[2]);
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
@@ -2821,6 +2923,50 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
+(define_insn "<code>v2qi3"
+ [(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "register_operand" "%0,0,x,v")
+ (match_operand:V2QI 2 "register_operand" "r,x,x,v")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "type" "alu,sselog,sselog,sselog")
+ (set_attr "mode" "SI,TI,TI,TI")])
+
+(define_split
+ [(set (match_operand:V2QI 0 "general_reg_operand")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "general_reg_operand")
+ (match_operand:V2QI 2 "general_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_logic:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+ [(set (match_operand:V2QI 0 "sse_reg_operand")
+ (any_logic:V2QI
+ (match_operand:V2QI 1 "sse_reg_operand")
+ (match_operand:V2QI 2 "sse_reg_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0)
+ (any_logic:V4QI (match_dup 1) (match_dup 2)))]
+{
+ operands[2] = gen_lowpart (V4QImode, operands[2]);
+ operands[1] = gen_lowpart (V4QImode, operands[1]);
+ operands[0] = gen_lowpart (V4QImode, operands[0]);
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral element swizzling