diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2022-01-04 19:41:47 +0100 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2022-01-04 19:43:54 +0100 |
commit | 708b87dcb6e48cb48d170a4b3625088995377a5c (patch) | |
tree | 36d096d9dc06a0844840e21628ed42aad606f651 /gcc/config | |
parent | dd8020032401f6dcd76c41017fcf75816490836d (diff) | |
download | gcc-708b87dcb6e48cb48d170a4b3625088995377a5c.zip gcc-708b87dcb6e48cb48d170a4b3625088995377a5c.tar.gz gcc-708b87dcb6e48cb48d170a4b3625088995377a5c.tar.bz2 |
i386: Introduce V2QImode vectorized logic [PR103861]
Add V2QImode logic operations with SSE and GP registers and split
them to V4QImode SSE instructions or SImode GP instructions.
The patch also fixes PR target/103900.
2022-01-04 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/103861
* config/i386/mmx.md (one_cmplv2qi3): New insn pattern.
(one_cmplv2qi3 splitters): New post-reload splitters.
(*andnotv2qi3): New insn pattern.
(andnotv2qi3 splitters): New post-reload splitters.
(<any_logic:code>v2qi3): New insn pattern.
(<any_logic:insn>v2qi3 splitters): New post-reload splitters.
gcc/testsuite/ChangeLog:
PR target/103861
* gcc.target/i386/warn-vect-op-2.c: Adjust warnings.
* gcc.target/i386/pr103900.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/mmx.md | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 5b33d3c..fc8ec5e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2745,6 +2745,45 @@ "TARGET_SSE2" "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));") +(define_insn "one_cmplv2qi2" + [(set (match_operand:V2QI 0 "register_operand" "=r,&x,&v") + (not:V2QI + (match_operand:V2QI 1 "register_operand" "0,x,v")))] + "" + "#" + [(set_attr "isa" "*,sse2,avx512vl") + (set_attr "type" "negnot,sselog,sselog") + (set_attr "mode" "SI,TI,TI")]) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (not:V2QI + (match_operand:V2QI 1 "general_reg_operand")))] + "reload_completed" + [(set (match_dup 0) + (not:SI (match_dup 1)))] +{ + operands[1] = gen_lowpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "sse_reg_operand") + (not:V2QI + (match_operand:V2QI 1 "sse_reg_operand")))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) + (xor:V4QI + (match_dup 0) (match_dup 1)))] +{ + emit_insn + (gen_rtx_SET (gen_rtx_REG (V16QImode, REGNO (operands[0])), + CONSTM1_RTX (V16QImode))); + + operands[1] = gen_lowpart (V4QImode, operands[1]); + operands[0] = gen_lowpart (V4QImode, operands[0]); +}) + (define_insn "mmx_andnot<mode>3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v") (and:MMXMODEI @@ -2775,6 +2814,69 @@ (set_attr "type" "sselog") (set_attr "mode" "TI")]) +(define_insn "*andnotv2qi3" + [(set (match_operand:V2QI 0 "register_operand" "=&r,r,x,x,v") + (and:V2QI + (not:V2QI (match_operand:V2QI 1 "register_operand" "0,r,0,x,v")) + (match_operand:V2QI 2 "register_operand" "r,r,x,x,v"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "isa" "*,bmi,sse2_noavx,avx,avx512vl") + (set_attr "type" "alu,bitmanip,sselog,sselog,sselog") + (set_attr "mode" "SI,SI,TI,TI,TI")]) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (and:V2QI + (not:V2QI (match_operand:V2QI 1 "general_reg_operand")) + (match_operand:V2QI 2 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI && reload_completed" + [(parallel + [(set (match_dup 0) + (and:SI (not:SI (match_dup 1)) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = gen_lowpart (SImode, operands[2]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (and:V2QI + (not:V2QI (match_operand:V2QI 1 "general_reg_operand")) + (match_operand:V2QI 2 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_BMI && reload_completed" + [(set (match_dup 0) + (not:SI (match_dup 1))) + (parallel + [(set (match_dup 0) + (and:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = gen_lowpart (SImode, operands[2]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "sse_reg_operand") + (and:V2QI + (not:V2QI (match_operand:V2QI 1 "sse_reg_operand")) + (match_operand:V2QI 2 "sse_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) + (and:V4QI (not:V4QI (match_dup 1)) (match_dup 2)))] +{ + operands[2] = gen_lowpart (V4QImode, operands[2]); + operands[1] = gen_lowpart (V4QImode, operands[1]); + operands[0] = gen_lowpart (V4QImode, operands[0]); +}) + (define_expand "mmx_<code><mode>3" [(set (match_operand:MMXMODEI 0 "register_operand") (any_logic:MMXMODEI @@ -2821,6 +2923,50 @@ (set_attr "type" "sselog") (set_attr "mode" "TI")]) +(define_insn "<code>v2qi3" + [(set (match_operand:V2QI 0 "register_operand" "=r,x,x,v") + (any_logic:V2QI + (match_operand:V2QI 1 "register_operand" "%0,0,x,v") + (match_operand:V2QI 2 "register_operand" "r,x,x,v"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") + (set_attr "type" "alu,sselog,sselog,sselog") + (set_attr "mode" "SI,TI,TI,TI")]) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (any_logic:V2QI + (match_operand:V2QI 1 "general_reg_operand") + (match_operand:V2QI 2 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel + [(set (match_dup 0) + (any_logic:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[2] = gen_lowpart (SImode, operands[2]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "sse_reg_operand") + (any_logic:V2QI + (match_operand:V2QI 1 "sse_reg_operand") + (match_operand:V2QI 2 "sse_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) + (any_logic:V4QI (match_dup 1) (match_dup 2)))] +{ + operands[2] = gen_lowpart (V4QImode, operands[2]); + operands[1] = gen_lowpart (V4QImode, operands[1]); + operands[0] = gen_lowpart (V4QImode, operands[0]); +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral element swizzling |