diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-12-14 18:27:22 +0100 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-12-14 18:28:21 +0100 |
commit | 7a54d3deecf967029f18aa5ed1fcbdb752e213b9 (patch) | |
tree | 849f669059e68fe9a6a5315740601242f58e6664 /gcc/config | |
parent | 2cf62ef5aa80e3659a8150a48d93a1d333f1d292 (diff) | |
download | gcc-7a54d3deecf967029f18aa5ed1fcbdb752e213b9.zip gcc-7a54d3deecf967029f18aa5ed1fcbdb752e213b9.tar.gz gcc-7a54d3deecf967029f18aa5ed1fcbdb752e213b9.tar.bz2 |
i386: Implement VxHF vector set/insert/extract with lower ABI levels
This is a preparation patch that moves VxHF vector set/insert/extract
expansions from AVX512FP16 ABI to lower ABIs. There are no functional
changes for -mavx512fp16 and a follow-up patch is needed to actually
enable VxHF vector modes for lower ABIs.
2021-12-14 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/103571
* config/i386/i386-expand.c (ix86_expand_vector_init_duplicate)
<case E_V8HFmode>: Implement for TARGET_SSE2.
<case E_V16HFmode>: Implement for TARGET_AVX.
<case E_V32HFmode>: Implement for TARGET_AVX512F.
(ix86_expand_vector_set_var): Handle V32HFmode
without TARGET_AVX512BW.
(ix86_expand_vector_extract)
<case E_V8HFmode>: Implement for TARGET_SSE2.
<case E_V16HFmode>: Implement for TARGET_AVX.
<case E_V32HFmode>: Implement for TARGET_AVX512BW.
(expand_vec_perm_broadcast_1) <case E_V8HFmode>: New.
* config/i386/sse.md (VI12HF_AVX512VL): Remove
TARGET_AVX512FP16 condition.
(V): Ditto.
(V_256_512): Ditto.
(avx_vbroadcastf128_<mode>): Use V_256H mode iterator.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 118 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 19 |
2 files changed, 91 insertions, 46 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2bbb28e..7013c20 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14855,6 +14855,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, goto widen; case E_V8HImode: + case E_V8HFmode: if (TARGET_AVX2) return ix86_vector_duplicate_value (mode, target, val); @@ -14871,15 +14872,22 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, dperm.op0 = dperm.op1 = gen_reg_rtx (mode); dperm.one_operand_p = true; - /* Extend to SImode using a paradoxical SUBREG. */ - tmp1 = gen_reg_rtx (SImode); - emit_move_insn (tmp1, gen_lowpart (SImode, val)); - - /* Insert the SImode value as low element of a V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); - emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); + if (mode == V8HFmode) + tmp1 = lowpart_subreg (V8HFmode, force_reg (HFmode, val), HFmode); + else + { + /* Extend to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + + /* Insert the SImode value as + low element of a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); + tmp1 = gen_lowpart (mode, tmp2); + } + emit_move_insn (dperm.op0, tmp1); ok = (expand_vec_perm_1 (&dperm) || expand_vec_perm_broadcast_1 (&dperm)); gcc_assert (ok); @@ -14926,12 +14934,15 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } case E_V16HImode: + case E_V16HFmode: case E_V32QImode: if (TARGET_AVX2) return ix86_vector_duplicate_value (mode, target, val); else { - machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); + machine_mode hvmode = (mode == V16HImode ? V8HImode + : mode == V16HFmode ? V8HFmode + : V16QImode); rtx x = gen_reg_rtx (hvmode); ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); @@ -14942,13 +14953,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return true; - case E_V64QImode: case E_V32HImode: + case E_V32HFmode: + case E_V64QImode: if (TARGET_AVX512BW) return ix86_vector_duplicate_value (mode, target, val); else { - machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); + machine_mode hvmode = (mode == V32HImode ? V16HImode + : mode == V32HFmode ? V16HFmode + : V32QImode); rtx x = gen_reg_rtx (hvmode); ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); @@ -14959,11 +14973,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return true; - case E_V8HFmode: - case E_V16HFmode: - case E_V32HFmode: - return ix86_vector_duplicate_value (mode, target, val); - default: return false; } @@ -15912,7 +15921,8 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) /* 512-bits vector byte/word broadcast and comparison only available under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector when without TARGET_AVX512BW. */ - if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW) + if ((mode == V32HImode || mode == V32HFmode || mode == V64QImode) + && !TARGET_AVX512BW) { gcc_assert (TARGET_AVX512F); rtx vhi, vlo, idx_hi; @@ -15926,6 +15936,12 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) extract_hi = gen_vec_extract_hi_v32hi; extract_lo = gen_vec_extract_lo_v32hi; } + else if (mode == V32HFmode) + { + half_mode = V16HFmode; + extract_hi = gen_vec_extract_hi_v32hf; + extract_lo = gen_vec_extract_lo_v32hf; + } else { half_mode = V32QImode; @@ -15973,7 +15989,6 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx) case E_V16SFmode: cmp_mode = V16SImode; break; - /* TARGET_AVX512FP16 implies TARGET_AVX512BW. */ case E_V8HFmode: cmp_mode = V8HImode; break; @@ -16538,6 +16553,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) break; case E_V8HImode: + case E_V8HFmode: case E_V2HImode: use_vec_extr = TARGET_SSE2; break; @@ -16704,25 +16720,29 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) return; case E_V32HFmode: - tmp = gen_reg_rtx (V16HFmode); - if (elt < 16) - emit_insn (gen_vec_extract_lo_v32hf (tmp, vec)); - else - emit_insn (gen_vec_extract_hi_v32hf (tmp, vec)); - ix86_expand_vector_extract (false, target, tmp, elt & 15); - return; + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V16HFmode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32hf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32hf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; case E_V16HFmode: - tmp = gen_reg_rtx (V8HFmode); - if (elt < 8) - emit_insn (gen_vec_extract_lo_v16hf (tmp, vec)); - else - emit_insn (gen_vec_extract_hi_v16hf (tmp, vec)); - ix86_expand_vector_extract (false, target, tmp, elt & 7); - return; - - case E_V8HFmode: - use_vec_extr = true; + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V8HFmode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16hf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16hf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + } break; case E_V8QImode: @@ -21443,6 +21463,34 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); return true; + case E_V8HFmode: + /* This can be implemented via interleave and pshufd. */ + if (d->testing_p) + return true; + + if (elt >= nelt2) + { + gen = gen_vec_interleave_highv8hf; + elt -= nelt2; + } + else + gen = gen_vec_interleave_lowv8hf; + nelt2 /= 2; + + dest = gen_reg_rtx (vmode); + emit_insn (gen (dest, op0, op0)); + + vmode = V4SImode; + op0 = gen_lowpart (vmode, dest); + + memset (perm2, elt, 4); + dest = gen_reg_rtx (vmode); + ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); + gcc_assert (ok); + + emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); + return true; + case E_V32QImode: case E_V16HImode: case E_V8SImode: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5421fb5..929eef5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -266,9 +266,7 @@ (define_mode_iterator VI12HF_AVX512VL [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16") - (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")]) + V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) ;; Same iterator, but without supposed TARGET_AVX512BW (define_mode_iterator VI12_AVX512VLBW @@ -285,8 +283,7 @@ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) @@ -311,10 +308,10 @@ ;; All 256bit and 512bit vector modes (define_mode_iterator V_256_512 - [V32QI V16HI V8SI V4DI V8SF V4DF - (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F") - (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") - (V16HF "TARGET_AVX512FP16") (V32HF "TARGET_AVX512FP16")]) + [V32QI V16HI V16HF V8SI V4DI V8SF V4DF + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) ;; All vector float modes (define_mode_iterator VF @@ -24892,8 +24889,8 @@ "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);") (define_insn "avx_vbroadcastf128_<mode>" - [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v") - (vec_concat:V_256 + [(set (match_operand:V_256H 0 "register_operand" "=x,x,x,v,v,v,v") + (vec_concat:V_256H (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0") (match_dup 1)))] "TARGET_AVX" |