aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorkonglin1 <lingling.kong@intel.com>2022-08-18 15:28:10 +0800
committerkonglin1 <lingling.kong@intel.com>2022-08-18 19:38:05 +0800
commit6910cad55ffc330dc9767d2c8e0b66ccfa4134af (patch)
tree2c3e28b14367ddf7d7af0964105a1835a4d84b9f /gcc/config
parent2e8038898376b841205a146ef34dcd72d9eec07b (diff)
downloadgcc-6910cad55ffc330dc9767d2c8e0b66ccfa4134af.zip
gcc-6910cad55ffc330dc9767d2c8e0b66ccfa4134af.tar.gz
gcc-6910cad55ffc330dc9767d2c8e0b66ccfa4134af.tar.bz2
x86: Support vector __bf16 type
gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle vector BFmode. (ix86_expand_vector_init_duplicate): Support vector BFmode. (ix86_expand_vector_init_one_nonzero): Ditto. (ix86_expand_vector_init_one_var): Ditto. (ix86_expand_vector_init_concat): Ditto. (ix86_expand_vector_init_interleave): Ditto. (ix86_expand_vector_init_general): Ditto. (ix86_expand_vector_init): Ditto. (ix86_expand_vector_set_var): Ditto. (ix86_expand_vector_set): Ditto. (ix86_expand_vector_extract): Ditto. * config/i386/i386.cc (classify_argument): Add BF vector modes. (function_arg_64): Ditto. (ix86_gimplify_va_arg): Ditto. (ix86_get_ssemov): Ditto. * config/i386/i386.h (VALID_AVX256_REG_MODE): Add BF vector modes. (VALID_AVX512F_REG_MODE): Ditto. (host_detect_local_cpu): Ditto. (VALID_SSE2_REG_MODE): Ditto. * config/i386/i386.md: Add BF vector modes. (MODE_SIZE): Ditto. (ssemodesuffix): Add bf suffix for BF vector modes. (ssevecmode): Ditto. * config/i386/sse.md (VMOVE): Adjust for BF vector modes. (VI12HFBF_AVX512VL): Ditto. (V_256_512): Ditto. (VF_AVX512HFBF16): Ditto. (VF_AVX512BWHFBF16): Ditto. (VIHFBF): Ditto. (avx512): Ditto. (VIHFBF_256): Ditto. (VIHFBF_AVX512BW): Ditto. (VI2F_256_512):Ditto. (V8_128):Ditto. (V16_256): Ditto. (V32_512): Ditto. (sseinsnmode): Ditto. (sseconstm1): Ditto. (sseintmodesuffix): New mode_attr. (avx512fmaskmode): Ditto. (avx512fmaskmodelower): Ditto. (ssedoublevecmode): Ditto. (ssehalfvecmode): Ditto. (ssehalfvecmodelower): Ditto. (ssescalarmode): Add vector BFmode mapping. (ssescalarmodelower): Ditto. (ssexmmmode): Ditto. (ternlogsuffix): Ditto. (ssescalarsize): Ditto. (sseintprefix): Ditto. (i128): Ditto. (xtg_mode): Ditto. (bcstscalarsuff): Ditto. (<avx512>_blendm<mode>): New define_insn for BFmode. (<avx512>_store<mode>_mask): Ditto. (vcond_mask_<mode><avx512fmaskmodelower>): Ditto. (vec_set<mode>_0): New define_insn for BF vector set. (V8BFH_128): New mode_iterator for BFmode. (avx512fp16_mov<mode>): Ditto. (vec_set<mode>): New define_insn for BF vector set. (@vec_extract_hi_<mode>): Ditto. (@vec_extract_lo_<mode>): Ditto. (vec_set_hi_<mode>): Ditto. (vec_set_lo_<mode>): Ditto. (*vec_extract<mode>_0): New define_insn_and_split for BF vector extract. (*vec_extract<mode>): New define_insn. (VEC_EXTRACT_MODE): Add BF vector modes. (PINSR_MODE): Add V8BF. (sse2p4_1): Ditto. (pinsr_evex_isa): Ditto. (<sse2p4_1>_pinsr<ssemodesuffix>): Adjust to support insert for V8BFmode. (pbroadcast_evex_isa): Add BF vector modes. (AVX2_VEC_DUP_MODE): Ditto. (VEC_INIT_MODE): Ditto. (VEC_INIT_HALF_MODE): Ditto. (avx2_pbroadcast<mode>): Adjust to support BF vector mode broadcast. (avx2_pbroadcast<mode>_1): Ditto. (<avx512>_vec_dup<mode>_1): Ditto. (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): Ditto. gcc/testsuite/ChangeLog: * g++.target/i386/vect-bfloat16-1.C: New test. * gcc.target/i386/vect-bfloat16-1.c: New test. * gcc.target/i386/vect-bfloat16-2a.c: New test. * gcc.target/i386/vect-bfloat16-2b.c: New test. * gcc.target/i386/vect-bfloat16-typecheck_1.c: New test. * gcc.target/i386/vect-bfloat16-typecheck_2.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-expand.cc129
-rw-r--r--gcc/config/i386/i386.cc16
-rw-r--r--gcc/config/i386/i386.h12
-rw-r--r--gcc/config/i386/i386.md9
-rw-r--r--gcc/config/i386/sse.md211
5 files changed, 258 insertions, 119 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 66d8f28..4b21630 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4064,6 +4064,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
case E_V16QImode:
case E_V8HImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V4SImode:
case E_V2DImode:
case E_V1TImode:
@@ -4084,6 +4085,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
case E_V32QImode:
case E_V16HImode:
case E_V16HFmode:
+ case E_V16BFmode:
case E_V8SImode:
case E_V4DImode:
if (TARGET_AVX2)
@@ -4102,6 +4104,9 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
case E_V32HFmode:
gen = gen_avx512bw_blendmv32hf;
break;
+ case E_V32BFmode:
+ gen = gen_avx512bw_blendmv32bf;
+ break;
case E_V16SImode:
gen = gen_avx512f_blendmv16si;
break;
@@ -15008,6 +15013,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
case E_V8HImode:
case E_V8HFmode:
+ case E_V8BFmode:
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
@@ -15092,6 +15098,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
case E_V16HImode:
case E_V16HFmode:
+ case E_V16BFmode:
case E_V32QImode:
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
@@ -15112,6 +15119,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
case E_V32HImode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V64QImode:
if (TARGET_AVX512BW)
return ix86_vector_duplicate_value (mode, target, val);
@@ -15119,6 +15127,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
{
machine_mode hvmode = (mode == V32HImode ? V16HImode
: mode == V32HFmode ? V16HFmode
+ : mode == V32BFmode ? V16BFmode
: V32QImode);
rtx x = gen_reg_rtx (hvmode);
@@ -15232,6 +15241,18 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
use_vector_set = TARGET_AVX512FP16 && one_var == 0;
gen_vec_set_0 = gen_vec_setv32hf_0;
break;
+ case E_V8BFmode:
+ use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+ gen_vec_set_0 = gen_vec_setv8bf_0;
+ break;
+ case E_V16BFmode:
+ use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+ gen_vec_set_0 = gen_vec_setv16bf_0;
+ break;
+ case E_V32BFmode:
+ use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+ gen_vec_set_0 = gen_vec_setv32bf_0;
+ break;
case E_V32HImode:
use_vector_set = TARGET_AVX512FP16 && one_var == 0;
gen_vec_set_0 = gen_vec_setv32hi_0;
@@ -15386,6 +15407,8 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
/* FALLTHRU */
case E_V8HFmode:
case E_V16HFmode:
+ case E_V8BFmode:
+ case E_V16BFmode:
case E_V4DFmode:
case E_V8SFmode:
case E_V8SImode:
@@ -15469,6 +15492,9 @@ ix86_expand_vector_init_concat (machine_mode mode,
case E_V32HFmode:
half_mode = V16HFmode;
break;
+ case E_V32BFmode:
+ half_mode = V16BFmode;
+ break;
case E_V16SImode:
half_mode = V8SImode;
break;
@@ -15484,6 +15510,9 @@ ix86_expand_vector_init_concat (machine_mode mode,
case E_V16HFmode:
half_mode = V8HFmode;
break;
+ case E_V16BFmode:
+ half_mode = V8BFmode;
+ break;
case E_V8SImode:
half_mode = V4SImode;
break;
@@ -15642,6 +15671,15 @@ ix86_expand_vector_init_interleave (machine_mode mode,
second_imode = V2DImode;
third_imode = VOIDmode;
break;
+ case E_V8BFmode:
+ gen_load_even = gen_vec_interleave_lowv8bf;
+ gen_interleave_first_low = gen_vec_interleave_lowv4si;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ inner_mode = BFmode;
+ first_imode = V4SImode;
+ second_imode = V2DImode;
+ third_imode = VOIDmode;
+ break;
case E_V8HImode:
gen_load_even = gen_vec_setv8hi;
gen_interleave_first_low = gen_vec_interleave_lowv4si;
@@ -15667,15 +15705,18 @@ ix86_expand_vector_init_interleave (machine_mode mode,
for (i = 0; i < n; i++)
{
op = ops [i + i];
- if (inner_mode == HFmode)
+ if (inner_mode == HFmode || inner_mode == BFmode)
{
rtx even, odd;
- /* Use vpuncklwd to pack 2 HFmode. */
- op0 = gen_reg_rtx (V8HFmode);
- even = lowpart_subreg (V8HFmode, force_reg (HFmode, op), HFmode);
- odd = lowpart_subreg (V8HFmode,
- force_reg (HFmode, ops[i + i + 1]),
- HFmode);
+ /* Use vpuncklwd to pack 2 HFmode or BFmode. */
+ machine_mode vec_mode =
+ (inner_mode == HFmode) ? V8HFmode : V8BFmode;
+ op0 = gen_reg_rtx (vec_mode);
+ even = lowpart_subreg (vec_mode,
+ force_reg (inner_mode, op), inner_mode);
+ odd = lowpart_subreg (vec_mode,
+ force_reg (inner_mode, ops[i + i + 1]),
+ inner_mode);
emit_insn (gen_load_even (op0, even, odd));
}
else
@@ -15824,6 +15865,10 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
half_mode = V8HFmode;
goto half;
+ case E_V16BFmode:
+ half_mode = V8BFmode;
+ goto half;
+
half:
n = GET_MODE_NUNITS (mode);
for (i = 0; i < n; i++)
@@ -15852,6 +15897,11 @@ half:
half_mode = V16HFmode;
goto quarter;
+ case E_V32BFmode:
+ quarter_mode = V8BFmode;
+ half_mode = V16BFmode;
+ goto quarter;
+
quarter:
n = GET_MODE_NUNITS (mode);
for (i = 0; i < n; i++)
@@ -15891,6 +15941,7 @@ quarter:
/* FALLTHRU */
case E_V8HFmode:
+ case E_V8BFmode:
n = GET_MODE_NUNITS (mode);
for (i = 0; i < n; i++)
@@ -15994,7 +16045,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
if (inner_mode == QImode
|| inner_mode == HImode
|| inner_mode == TImode
- || inner_mode == HFmode)
+ || inner_mode == HFmode
+ || inner_mode == BFmode)
{
unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
scalar_mode elt_mode = inner_mode == TImode ? DImode : SImode;
@@ -16078,7 +16130,8 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
/* 512-bits vector byte/word broadcast and comparison only available
under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector
when without TARGET_AVX512BW. */
- if ((mode == V32HImode || mode == V32HFmode || mode == V64QImode)
+ if ((mode == V32HImode || mode == V32HFmode || mode == V32BFmode
+ || mode == V64QImode)
&& !TARGET_AVX512BW)
{
gcc_assert (TARGET_AVX512F);
@@ -16099,6 +16152,12 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
extract_hi = gen_vec_extract_hi_v32hf;
extract_lo = gen_vec_extract_lo_v32hf;
}
+ else if (mode == V32BFmode)
+ {
+ half_mode = V16BFmode;
+ extract_hi = gen_vec_extract_hi_v32bf;
+ extract_lo = gen_vec_extract_lo_v32bf;
+ }
else
{
half_mode = V32QImode;
@@ -16155,6 +16214,15 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V32HFmode:
cmp_mode = V32HImode;
break;
+ case E_V8BFmode:
+ cmp_mode = V8HImode;
+ break;
+ case E_V16BFmode:
+ cmp_mode = V16HImode;
+ break;
+ case E_V32BFmode:
+ cmp_mode = V32HImode;
+ break;
default:
gcc_unreachable ();
}
@@ -16192,7 +16260,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
bool use_vec_merge = false;
bool blendm_const = false;
rtx tmp;
- static rtx (*gen_extract[7][2]) (rtx, rtx)
+ static rtx (*gen_extract[8][2]) (rtx, rtx)
= {
{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
@@ -16200,9 +16268,10 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df },
- { gen_vec_extract_lo_v16hf, gen_vec_extract_hi_v16hf }
+ { gen_vec_extract_lo_v16hf, gen_vec_extract_hi_v16hf },
+ { gen_vec_extract_lo_v16bf, gen_vec_extract_hi_v16bf }
};
- static rtx (*gen_insert[7][2]) (rtx, rtx, rtx)
+ static rtx (*gen_insert[8][2]) (rtx, rtx, rtx)
= {
{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
@@ -16211,6 +16280,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df },
{ gen_vec_set_lo_v16hf, gen_vec_set_hi_v16hf },
+ { gen_vec_set_lo_v16bf, gen_vec_set_hi_v16bf },
};
int i, j, n;
machine_mode mmode = VOIDmode;
@@ -16379,6 +16449,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
case E_V8HImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V2HImode:
use_vec_merge = TARGET_SSE2;
break;
@@ -16402,18 +16473,20 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
goto half;
case E_V16HFmode:
+ case E_V16BFmode:
/* For ELT == 0, vec_setv8hf_0 can save 1 vpbroadcastw. */
if (TARGET_AVX2 && elt != 0)
{
mmode = SImode;
- gen_blendm = gen_avx2_pblendph_1;
+ gen_blendm = ((mode == E_V16HFmode) ? gen_avx2_pblendph_1
+ : gen_avx2_pblendbf_1);
blendm_const = true;
break;
}
else
{
- half_mode = V8HFmode;
- j = 6;
+ half_mode = ((mode == E_V16HFmode) ? V8HFmode : V8BFmode);
+ j = ((mode == E_V16HFmode) ? 6 : 7);
n = 8;
goto half;
}
@@ -16505,6 +16578,13 @@ half:
gen_blendm = gen_avx512bw_blendmv32hf;
}
break;
+ case E_V32BFmode:
+ if (TARGET_AVX512BW)
+ {
+ mmode = SImode;
+ gen_blendm = gen_avx512bw_blendmv32bf;
+ }
+ break;
case E_V32HImode:
if (TARGET_AVX512BW)
{
@@ -16712,6 +16792,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case E_V8HImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V2HImode:
use_vec_extr = TARGET_SSE2;
break;
@@ -16878,26 +16959,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
return;
case E_V32HFmode:
+ case E_V32BFmode:
if (TARGET_AVX512BW)
{
- tmp = gen_reg_rtx (V16HFmode);
+ tmp = (mode == E_V32HFmode
+ ? gen_reg_rtx (V16HFmode)
+ : gen_reg_rtx (V16BFmode));
if (elt < 16)
- emit_insn (gen_vec_extract_lo_v32hf (tmp, vec));
+ emit_insn (maybe_gen_vec_extract_lo (mode, tmp, vec));
else
- emit_insn (gen_vec_extract_hi_v32hf (tmp, vec));
+ emit_insn (maybe_gen_vec_extract_hi (mode, tmp, vec));
ix86_expand_vector_extract (false, target, tmp, elt & 15);
return;
}
break;
case E_V16HFmode:
+ case E_V16BFmode:
if (TARGET_AVX)
{
- tmp = gen_reg_rtx (V8HFmode);
+ tmp = (mode == E_V16HFmode
+ ? gen_reg_rtx (V8HFmode)
+ : gen_reg_rtx (V8BFmode));
if (elt < 8)
- emit_insn (gen_vec_extract_lo_v16hf (tmp, vec));
+ emit_insn (maybe_gen_vec_extract_lo (mode, tmp, vec));
else
- emit_insn (gen_vec_extract_hi_v16hf (tmp, vec));
+ emit_insn (maybe_gen_vec_extract_hi (mode, tmp, vec));
ix86_expand_vector_extract (false, target, tmp, elt & 7);
return;
}
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fa3722a..e27c87f 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2463,6 +2463,7 @@ classify_argument (machine_mode mode, const_tree type,
case E_V8SImode:
case E_V32QImode:
case E_V16HFmode:
+ case E_V16BFmode:
case E_V16HImode:
case E_V4DFmode:
case E_V4DImode:
@@ -2474,6 +2475,7 @@ classify_argument (machine_mode mode, const_tree type,
case E_V8DFmode:
case E_V16SFmode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V8DImode:
case E_V16SImode:
case E_V32HImode:
@@ -2492,6 +2494,7 @@ classify_argument (machine_mode mode, const_tree type,
case E_V16QImode:
case E_V8HImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V2DFmode:
case E_V2DImode:
classes[0] = X86_64_SSE_CLASS;
@@ -2947,6 +2950,7 @@ pass_in_reg:
/* FALLTHRU */
case E_V16HFmode:
+ case E_V16BFmode:
case E_V8SFmode:
case E_V8SImode:
case E_V64QImode:
@@ -2954,6 +2958,7 @@ pass_in_reg:
case E_V16SImode:
case E_V8DImode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V16SFmode:
case E_V8DFmode:
case E_V32QImode:
@@ -2966,6 +2971,7 @@ pass_in_reg:
case E_V4SImode:
case E_V2DImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V4SFmode:
case E_V2DFmode:
if (!type || !AGGREGATE_TYPE_P (type))
@@ -3190,6 +3196,7 @@ pass_in_reg:
case E_V4SImode:
case E_V2DImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V4SFmode:
case E_V2DFmode:
if (!type || !AGGREGATE_TYPE_P (type))
@@ -3210,9 +3217,11 @@ pass_in_reg:
case E_V16SImode:
case E_V8DImode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V16SFmode:
case E_V8DFmode:
case E_V16HFmode:
+ case E_V16BFmode:
case E_V8SFmode:
case E_V8SImode:
case E_V32QImode:
@@ -3273,6 +3282,7 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
break;
case E_V16HFmode:
+ case E_V16BFmode:
case E_V8SFmode:
case E_V8SImode:
case E_V32QImode:
@@ -3280,6 +3290,7 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
case E_V4DFmode:
case E_V4DImode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V16SFmode:
case E_V16SImode:
case E_V64QImode:
@@ -4748,6 +4759,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
switch (nat_mode)
{
case E_V16HFmode:
+ case E_V16BFmode:
case E_V8SFmode:
case E_V8SImode:
case E_V32QImode:
@@ -4755,6 +4767,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
case E_V4DFmode:
case E_V4DImode:
case E_V32HFmode:
+ case E_V32BFmode:
case E_V16SFmode:
case E_V16SImode:
case E_V64QImode:
@@ -5430,7 +5443,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
switch (type)
{
case opcode_int:
- if (scalar_mode == E_HFmode)
+ if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
opcode = (misaligned_p
? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
: "vmovdqa64");
@@ -5450,6 +5463,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
switch (scalar_mode)
{
case E_HFmode:
+ case E_BFmode:
if (evex_reg_p)
opcode = (misaligned_p
? (TARGET_AVX512BW
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 0da3dce..0de5c77 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1011,7 +1011,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_AVX256_REG_MODE(MODE) \
((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
|| (MODE) == V4DImode || (MODE) == V2TImode || (MODE) == V8SFmode \
- || (MODE) == V4DFmode || (MODE) == V16HFmode)
+ || (MODE) == V4DFmode || (MODE) == V16HFmode || (MODE) == V16BFmode)
#define VALID_AVX256_REG_OR_OI_MODE(MODE) \
(VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode)
@@ -1026,7 +1026,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_AVX512F_REG_MODE(MODE) \
((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
|| (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
- || (MODE) == V4TImode || (MODE) == V32HFmode)
+ || (MODE) == V4TImode || (MODE) == V32HFmode || (MODE) == V32BFmode)
#define VALID_AVX512F_REG_OR_XI_MODE(MODE) \
(VALID_AVX512F_REG_MODE (MODE) || (MODE) == XImode)
@@ -1035,7 +1035,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \
|| (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
|| (MODE) == TFmode || (MODE) == V1TImode || (MODE) == V8HFmode \
- || (MODE) == TImode)
+ || (MODE) == V8BFmode || (MODE) == TImode)
#define VALID_AVX512FP16_REG_MODE(MODE) \
((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode \
@@ -1044,6 +1044,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \
+ || (MODE) == V8BFmode \
|| (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \
|| (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \
|| (MODE) == HFmode || (MODE) == BFmode)
@@ -1095,8 +1096,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
|| (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
- || (MODE) == V16SFmode || (MODE) == V32HFmode || (MODE) == V16HFmode \
- || (MODE) == V8HFmode)
+ || (MODE) == V16SFmode \
+ || (MODE) == V32HFmode || (MODE) == V16HFmode || (MODE) == V8HFmode \
+ || (MODE) == V32BFmode || (MODE) == V16BFmode || (MODE) == V8BFmode)
#define X87_FLOAT_MODE_P(MODE) \
(TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5f7e245..58fcc38 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1114,7 +1114,8 @@
(V2DF "16") (V4DF "32") (V8DF "64")
(V4SF "16") (V8SF "32") (V16SF "64")
(V8HF "16") (V16HF "32") (V32HF "64")
- (V4HF "8") (V2HF "4")])
+ (V4HF "8") (V2HF "4")
+ (V8BF "16") (V16BF "32") (V32BF "64")])
;; Double word integer modes as mode attribute.
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
@@ -1258,8 +1259,8 @@
(define_mode_attr ssemodesuffix
[(HF "sh") (SF "ss") (DF "sd")
(V32HF "ph") (V16SF "ps") (V8DF "pd")
- (V16HF "ph") (V8SF "ps") (V4DF "pd")
- (V8HF "ph") (V4SF "ps") (V2DF "pd")
+ (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
+ (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
(V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
(V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
@@ -1269,7 +1270,7 @@
;; SSE vector mode corresponding to a scalar mode
(define_mode_attr ssevecmode
- [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (SF "V4SF") (DF "V2DF")])
+ [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
(define_mode_attr ssevecmodelower
[(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b23f07e..9351548 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -232,6 +232,7 @@
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
(V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
@@ -263,10 +264,11 @@
[V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
-(define_mode_iterator VI12HF_AVX512VL
+(define_mode_iterator VI12HFBF_AVX512VL
[V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
- V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
+ V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
;; Same iterator, but without supposed TARGET_AVX512BW
(define_mode_iterator VI12_AVX512VLBW
@@ -309,10 +311,10 @@
;; All 256bit and 512bit vector modes
(define_mode_iterator V_256_512
- [V32QI V16HI V16HF V8SI V4DI V8SF V4DF
+ [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
(V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F")
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
- (V8DF "TARGET_AVX512F")])
+ (V32BF "TARGET_AVX512F") (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
;; All vector float modes
(define_mode_iterator VF
@@ -435,6 +437,13 @@
(define_mode_iterator VF_AVX512FP16
[V32HF V16HF V8HF])
+(define_mode_iterator VF_AVX512HFBF16
+ [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
+ (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF])
+
+(define_mode_iterator VF_AVX512BWHFBF16
+ [V32HF V16HF V8HF V32BF V16BF V8BF])
+
(define_mode_iterator VF_AVX512FP16VL
[V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
@@ -447,13 +456,14 @@
(V4DI "TARGET_AVX") V2DI])
;; All vector integer and HF modes
-(define_mode_iterator VIHF
+(define_mode_iterator VIHFBF
[(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF])
+ (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
(define_mode_iterator VI_AVX2
[(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
@@ -676,6 +686,7 @@
(V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
(V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
(V8HF "avx512fp16") (V16HF "avx512vl") (V32HF "avx512bw")
+ (V8BF "avx512vl") (V16BF "avx512vl") (V32BF "avx512bw")
(V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
(V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
@@ -786,7 +797,7 @@
;; All 128 and 256bit vector integer modes
(define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
;; All 256bit vector integer and HF modes
-(define_mode_iterator VIHF_256 [V32QI V16HI V8SI V4DI V16HF])
+(define_mode_iterator VIHFBF_256 [V32QI V16HI V8SI V4DI V16HF V16BF])
;; Various 128bit vector integer mode combinations
(define_mode_iterator VI12_128 [V16QI V8HI])
@@ -813,12 +824,12 @@
(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
(define_mode_iterator VI_AVX512BW
[V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
-(define_mode_iterator VIHF_AVX512BW
+(define_mode_iterator VIHFBF_AVX512BW
[V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
- (V32HF "TARGET_AVX512BW")])
+ (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
;; Int-float size matches
-(define_mode_iterator VI2F_256_512 [V16HI V32HI V16HF V32HF])
+(define_mode_iterator VI2F_256_512 [V16HI V32HI V16HF V32HF V16BF V32BF])
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF])
@@ -863,9 +874,9 @@
(V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
V16SF V8DF])
-(define_mode_iterator V8_128 [V8HI V8HF])
-(define_mode_iterator V16_256 [V16HI V16HF])
-(define_mode_iterator V32_512 [V32HI V32HF])
+(define_mode_iterator V8_128 [V8HI V8HF V8BF])
+(define_mode_iterator V16_256 [V16HI V16HF V16BF])
+(define_mode_iterator V32_512 [V32HI V32HF V32BF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
@@ -910,6 +921,7 @@
(V8SF "V8SF") (V4DF "V4DF")
(V4SF "V4SF") (V2DF "V2DF")
(V8HF "TI") (V16HF "OI") (V32HF "XI")
+ (V8BF "TI") (V16BF "OI") (V32BF "XI")
(TI "TI")])
(define_mode_attr sseintvecinsnmode
@@ -926,16 +938,17 @@
[(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
(V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
(V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
- (V32HF "BF") (V16SF "BF") (V8DF "BF")
- (V16HF "BF") (V8SF "BF") (V4DF "BF")
- (V8HF "BF") (V4SF "BF") (V2DF "BF")])
+ (V32HF "BF") (V32BF "BF") (V16SF "BF") (V8DF "BF")
+ (V16HF "BF") (V16BF "BF") (V8SF "BF") (V4DF "BF")
+ (V8HF "BF") (V8BF "BF") (V4SF "BF") (V2DF "BF")])
;; SSE integer instruction suffix for various modes
(define_mode_attr sseintmodesuffix
[(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
(V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
(V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")
- (V8HF "w") (V16HF "w") (V32HF "w")])
+ (V8HF "w") (V16HF "w") (V32HF "w")
+ (V8BF "w") (V16BF "w") (V32BF "w")])
;; Mapping of vector modes to corresponding mask size
(define_mode_attr avx512fmaskmode
@@ -944,6 +957,7 @@
(V16SI "HI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
(V32HF "SI") (V16HF "HI") (V8HF "QI")
+ (V32BF "SI") (V16BF "HI") (V8BF "QI")
(V16SF "HI") (V8SF "QI") (V4SF "QI")
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
@@ -958,6 +972,7 @@
(V16SI "hi") (V8SI "qi") (V4SI "qi")
(V8DI "qi") (V4DI "qi") (V2DI "qi")
(V32HF "si") (V16HF "hi") (V8HF "qi")
+ (V32BF "si") (V16BF "hi") (V8BF "qi")
(V16SF "hi") (V8SF "qi") (V4SF "qi")
(V8DF "qi") (V4DF "qi") (V2DF "qi")])
@@ -973,9 +988,9 @@
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
- [(V32HF "V32HI") (V16SF "V16SI") (V8DF "V8DI")
- (V16HF "V16HI") (V8SF "V8SI") (V4DF "V4DI")
- (V8HF "V8HI") (V4SF "V4SI") (V2DF "V2DI")
+ [(V32HF "V32HI") (V32BF "V32HI") (V16SF "V16SI") (V8DF "V8DI")
+ (V16HF "V16HI") (V16BF "V16HI") (V8SF "V8SI") (V4DF "V4DI")
+ (V8HF "V8HI") (V8BF "V8HI") (V4SF "V4SI") (V2DF "V2DI")
(V16SI "V16SI") (V8DI "V8DI")
(V8SI "V8SI") (V4DI "V4DI")
(V4SI "V4SI") (V2DI "V2DI")
@@ -998,9 +1013,9 @@
(V16HF "OI") (V8HF "TI")])
(define_mode_attr sseintvecmodelower
- [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di")
- (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di")
- (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di")
+ [(V32HF "v32hi") (V32BF "v32hi") (V16SF "v16si") (V8DF "v8di")
+ (V16HF "v16hi") (V16BF "v16hi") (V8SF "v8si") (V4DF "v4di")
+ (V8HF "v8hi") (V8BF "v8hi") (V4SF "v4si") (V2DF "v2di")
(V8SI "v8si") (V4DI "v4di")
(V4SI "v4si") (V2DI "v2di")
(V16HI "v16hi") (V8HI "v8hi")
@@ -1014,7 +1029,8 @@
(V16SF "V32SF") (V8DF "V16DF")
(V8SF "V16SF") (V4DF "V8DF")
(V4SF "V8SF") (V2DF "V4DF")
- (V32HF "V64HF") (V16HF "V32HF") (V8HF "V16HF")])
+ (V32HF "V64HF") (V16HF "V32HF") (V8HF "V16HF")
+ (V32BF "V64BF") (V16BF "V32BF") (V8BF "V16BF")])
;; Mapping of vector modes to a vector mode of half size
;; instead of V1DI/V1DF, DI/DF are used for V2DI/V2DF although they are scalar.
@@ -1025,7 +1041,8 @@
(V16SF "V8SF") (V8DF "V4DF")
(V8SF "V4SF") (V4DF "V2DF")
(V4SF "V2SF") (V2DF "DF")
- (V32HF "V16HF") (V16HF "V8HF") (V8HF "V4HF")])
+ (V32HF "V16HF") (V16HF "V8HF") (V8HF "V4HF")
+ (V32BF "V16BF") (V16BF "V8BF") (V8BF "V4BF")])
(define_mode_attr ssehalfvecmodelower
[(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
@@ -1034,7 +1051,8 @@
(V16SF "v8sf") (V8DF "v4df")
(V8SF "v4sf") (V4DF "v2df")
(V4SF "v2sf")
- (V32HF "v16hf") (V16HF "v8hf") (V8HF "v4hf")])
+ (V32HF "v16hf") (V16HF "v8hf") (V8HF "v4hf")
+ (V32BF "v16bf") (V16BF "v8bf") (V8BF "v4bf")])
;; Mapping of vector modes to vector hf modes of conversion.
(define_mode_attr ssePHmode
@@ -1085,6 +1103,7 @@
(V16SI "SI") (V8SI "SI") (V4SI "SI")
(V8DI "DI") (V4DI "DI") (V2DI "DI")
(V32HF "HF") (V16HF "HF") (V8HF "HF")
+ (V32BF "BF") (V16BF "BF") (V8BF "BF")
(V16SF "SF") (V8SF "SF") (V4SF "SF")
(V8DF "DF") (V4DF "DF") (V2DF "DF")
(V4TI "TI") (V2TI "TI")])
@@ -1096,6 +1115,7 @@
(V16SI "si") (V8SI "si") (V4SI "si")
(V8DI "di") (V4DI "di") (V2DI "di")
(V32HF "hf") (V16HF "hf") (V8HF "hf")
+ (V32BF "bf") (V16BF "bf") (V8BF "bf")
(V16SF "sf") (V8SF "sf") (V4SF "sf")
(V8DF "df") (V4DF "df") (V2DF "df")
(V4TI "ti") (V2TI "ti")])
@@ -1107,6 +1127,7 @@
(V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
(V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
(V32HF "V8HF") (V16HF "V8HF") (V8HF "V8HF")
+ (V32BF "V8BF") (V16BF "V8BF") (V8BF "V8BF")
(V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
(V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
@@ -1128,6 +1149,7 @@
(V16SF "d") (V8SF "d") (V4SF "d")
(V32HI "d") (V16HI "d") (V8HI "d")
(V32HF "d") (V16HF "d") (V8HF "d")
+ (V32BF "d") (V16BF "d") (V8BF "d")
(V64QI "d") (V32QI "d") (V16QI "d")])
;; Number of scalar elements in each vector type
@@ -1153,6 +1175,7 @@
(V32HI "16") (V16HI "16") (V8HI "16")
(V16SI "32") (V8SI "32") (V4SI "32")
(V32HF "16") (V16HF "16") (V8HF "16")
+ (V32BF "16") (V16BF "16") (V8BF "16")
(V16SF "32") (V8SF "32") (V4SF "32")
(V8DF "64") (V4DF "64") (V2DF "64")])
@@ -1164,9 +1187,9 @@
(V4SI "p") (V4SF "")
(V8SI "p") (V8SF "")
(V16SI "p") (V16SF "")
- (V16QI "p") (V8HI "p") (V8HF "p")
- (V32QI "p") (V16HI "p") (V16HF "p")
- (V64QI "p") (V32HI "p") (V32HF "p")])
+ (V16QI "p") (V8HI "p") (V8HF "p") (V8BF "p")
+ (V32QI "p") (V16HI "p") (V16HF "p") (V16BF "p")
+ (V64QI "p") (V32HI "p") (V32HF "p") (V32BF "p")])
;; SSE prefix for integer and HF vector comparison.
(define_mode_attr ssecmpintprefix
@@ -1219,7 +1242,8 @@
;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
;; i64x4 or f64x4 for 512bit modes.
(define_mode_attr i128
- [(V16HF "%~128") (V32HF "i64x4") (V16SF "f64x4") (V8SF "f128")
+ [(V16HF "%~128") (V32HF "i64x4") (V16BF "%~128") (V32BF "i64x4")
+ (V16SF "f64x4") (V8SF "f128")
(V8DF "f64x4") (V4DF "f128")
(V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
(V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
@@ -1245,17 +1269,18 @@
(V16SI "d") (V8SI "d") (V4SI "d")
(V8DI "q") (V4DI "q") (V2DI "q")
(V32HF "w") (V16HF "w") (V8HF "w")
+ (V32BF "w") (V16BF "w") (V8BF "w")
(V16SF "ss") (V8SF "ss") (V4SF "ss")
(V8DF "sd") (V4DF "sd") (V2DF "sd")])
;; Tie mode of assembler operand to mode iterator
(define_mode_attr xtg_mode
[(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x")
- (V8HF "x") (V4SF "x") (V2DF "x")
+ (V8HF "x") (V8BF "x") (V4SF "x") (V2DF "x")
(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t")
- (V16HF "t") (V8SF "t") (V4DF "t")
+ (V16HF "t") (V16BF "t") (V8SF "t") (V4DF "t")
(V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g")
- (V32HF "g") (V16SF "g") (V8DF "g")])
+ (V32HF "g") (V32BF "g") (V16SF "g") (V8DF "g")])
;; Half mask mode for unpacks
(define_mode_attr HALFMASKMODE
@@ -1553,10 +1578,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_blendm<mode>"
- [(set (match_operand:VF_AVX512FP16 0 "register_operand" "=v,v")
- (vec_merge:VF_AVX512FP16
- (match_operand:VF_AVX512FP16 2 "nonimmediate_operand" "vm,vm")
- (match_operand:VF_AVX512FP16 1 "nonimm_or_0_operand" "0C,v")
+ [(set (match_operand:VF_AVX512BWHFBF16 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512BWHFBF16
+ (match_operand:VF_AVX512BWHFBF16 2 "nonimmediate_operand" "vm,vm")
+ (match_operand:VF_AVX512BWHFBF16 1 "nonimm_or_0_operand" "0C,v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512BW"
"@
@@ -1595,9 +1620,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_store<mode>_mask"
- [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
- (vec_merge:VI12HF_AVX512VL
- (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VI12HFBF_AVX512VL
+ (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512BW"
@@ -4513,14 +4538,18 @@
DONE;
})
+(define_mode_iterator VF_AVX512HFBFVL
+ [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+ V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+
(define_expand "vcond<mode><sseintvecmodelower>"
- [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
- (if_then_else:VF_AVX512FP16VL
+ [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand")
+ (if_then_else:VF_AVX512HFBFVL
(match_operator 3 ""
[(match_operand:<sseintvecmode> 4 "vector_operand")
(match_operand:<sseintvecmode> 5 "vector_operand")])
- (match_operand:VF_AVX512FP16VL 1 "general_operand")
- (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+ (match_operand:VF_AVX512HFBFVL 1 "general_operand")
+ (match_operand:VF_AVX512HFBFVL 2 "general_operand")))]
"TARGET_AVX512FP16"
{
bool ok = ix86_expand_int_vcond (operands);
@@ -4552,10 +4581,10 @@
"TARGET_AVX512F")
(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
- [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
- (vec_merge:VI12HF_AVX512VL
- (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand")
- (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand")
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+ (vec_merge:VI12HFBF_AVX512VL
+ (match_operand:VI12HFBF_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand")
(match_operand:<avx512fmaskmode> 3 "register_operand")))]
"TARGET_AVX512BW")
@@ -10747,7 +10776,7 @@
(const_string "HF")
(const_string "TI")))
(set (attr "enabled")
- (cond [(and (not (match_test "<MODE>mode == V8HFmode"))
+ (cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == V8BFmode"))
(eq_attr "alternative" "2"))
(symbol_ref "false")
]
@@ -10809,11 +10838,13 @@
DONE;
})
-(define_insn "avx512fp16_movsh"
- [(set (match_operand:V8HF 0 "register_operand" "=v")
- (vec_merge:V8HF
- (match_operand:V8HF 2 "register_operand" "v")
- (match_operand:V8HF 1 "register_operand" "v")
+(define_mode_iterator V8BFH_128 [V8HF V8BF])
+
+(define_insn "avx512fp16_mov<mode>"
+ [(set (match_operand:V8BFH_128 0 "register_operand" "=v")
+ (vec_merge:V8BFH_128
+ (match_operand:V8BFH_128 2 "register_operand" "v")
+ (match_operand:V8BFH_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512FP16"
"vmovsh\t{%2, %1, %0|%0, %1, %2}"
@@ -10996,9 +11027,9 @@
DONE;
})
-(define_expand "vec_setv8hf"
- [(match_operand:V8HF 0 "register_operand")
- (match_operand:HF 1 "register_operand")
+(define_expand "vec_set<mode>"
+ [(match_operand:V8BFH_128 0 "register_operand")
+ (match_operand:<ssescalarmode> 1 "register_operand")
(match_operand 2 "vec_setm_sse41_operand")]
"TARGET_SSE"
{
@@ -11726,7 +11757,7 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "vec_extract_lo_<mode>"
+(define_insn_and_split "@vec_extract_lo_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
(vec_select:<ssehalfvecmode>
(match_operand:V32_512 1 "nonimmediate_operand" "v,m,v")
@@ -11768,7 +11799,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_extract_hi_<mode>"
+(define_insn "@vec_extract_hi_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
(vec_select:<ssehalfvecmode>
(match_operand:V32_512 1 "register_operand" "v")
@@ -11788,7 +11819,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn_and_split "vec_extract_lo_<mode>"
+(define_insn_and_split "@vec_extract_lo_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
(vec_select:<ssehalfvecmode>
(match_operand:V16_256 1 "nonimmediate_operand" "vm,v")
@@ -11802,7 +11833,7 @@
[(set (match_dup 0) (match_dup 1))]
"operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
-(define_insn "vec_extract_hi_<mode>"
+(define_insn "@vec_extract_hi_<mode>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm,vm,vm")
(vec_select:<ssehalfvecmode>
(match_operand:V16_256 1 "register_operand" "x,v,v")
@@ -11944,20 +11975,20 @@
;; NB: *vec_extract<mode>_0 must be placed before *vec_extracthf.
;; Otherwise, it will be ignored.
(define_insn_and_split "*vec_extract<mode>_0"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,r")
- (vec_select:HF
- (match_operand:VF_AVX512FP16 1 "nonimmediate_operand" "vm,v,m")
+ [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=v,m,r")
+ (vec_select:<ssescalarmode>
+ (match_operand:VF_AVX512HFBF16 1 "nonimmediate_operand" "vm,v,m")
(parallel [(const_int 0)])))]
- "TARGET_AVX512FP16 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (HFmode, operands[1]);")
+ "operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);")
-(define_insn "*vec_extracthf"
- [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
- (vec_select:HF
- (match_operand:V8HF 1 "register_operand" "v,v,0,v")
+(define_insn "*vec_extract<mode>"
+ [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
+ (vec_select:HFBF
+ (match_operand:<ssevecmode> 1 "register_operand" "v,v,0,v")
(parallel
[(match_operand:SI 2 "const_0_to_7_operand")])))]
"TARGET_SSE2"
@@ -11992,6 +12023,7 @@
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -18097,17 +18129,17 @@
;; Modes handled by pinsr patterns.
(define_mode_iterator PINSR_MODE
- [(V16QI "TARGET_SSE4_1") V8HI V8HF
+ [(V16QI "TARGET_SSE4_1") V8HI V8HF V8BF
(V4SI "TARGET_SSE4_1")
(V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
(define_mode_attr sse2p4_1
[(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2")
- (V4SI "sse4_1") (V2DI "sse4_1")])
+ (V8BF "sse2") (V4SI "sse4_1") (V2DI "sse4_1")])
(define_mode_attr pinsr_evex_isa
[(V16QI "avx512bw") (V8HI "avx512bw") (V8HF "avx512bw")
- (V4SI "avx512dq") (V2DI "avx512dq")])
+ (V8BF "avx512bw") (V4SI "avx512dq") (V2DI "avx512dq")])
;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
@@ -25193,11 +25225,12 @@
(V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
(V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
(V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
- (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")])
+ (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
+ (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
(define_insn "avx2_pbroadcast<mode>"
- [(set (match_operand:VIHF 0 "register_operand" "=x,v")
- (vec_duplicate:VIHF
+ [(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
+ (vec_duplicate:VIHFBF
(vec_select:<ssescalarmode>
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0)]))))]
@@ -25210,10 +25243,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx2_pbroadcast<mode>_1"
- [(set (match_operand:VIHF_256 0 "register_operand" "=x,x,v,v")
- (vec_duplicate:VIHF_256
+ [(set (match_operand:VIHFBF_256 0 "register_operand" "=x,x,v,v")
+ (vec_duplicate:VIHFBF_256
(vec_select:<ssescalarmode>
- (match_operand:VIHF_256 1 "nonimmediate_operand" "m,x,m,v")
+ (match_operand:VIHFBF_256 1 "nonimmediate_operand" "m,x,m,v")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"@
@@ -25589,10 +25622,10 @@
(set_attr "mode" "V4DF")])
(define_insn "<avx512>_vec_dup<mode>_1"
- [(set (match_operand:VIHF_AVX512BW 0 "register_operand" "=v,v")
- (vec_duplicate:VIHF_AVX512BW
+ [(set (match_operand:VIHFBF_AVX512BW 0 "register_operand" "=v,v")
+ (vec_duplicate:VIHFBF_AVX512BW
(vec_select:<ssescalarmode>
- (match_operand:VIHF_AVX512BW 1 "nonimmediate_operand" "v,m")
+ (match_operand:VIHFBF_AVX512BW 1 "nonimmediate_operand" "v,m")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
"@
@@ -25622,8 +25655,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_vec_dup<mode><mask_name>"
- [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:VI12HF_AVX512VL
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI12HFBF_AVX512VL
(vec_select:<ssescalarmode>
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)]))))]
@@ -25658,8 +25691,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI12HF_AVX512VL 0 "register_operand" "=v,v")
- (vec_duplicate:VI12HF_AVX512VL
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_duplicate:VI12HFBF_AVX512VL
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512BW"
"@
@@ -25759,7 +25792,7 @@
[(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
;; Modes handled by AVX2 vec_dup patterns.
(define_mode_iterator AVX2_VEC_DUP_MODE
- [V32QI V16QI V16HI V8HI V8SI V4SI V16HF V8HF])
+ [V32QI V16QI V16HI V8HI V8SI V4SI V16HF V8HF V16BF V8BF])
(define_insn "*vec_dup<mode>"
[(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
@@ -26522,6 +26555,7 @@
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -26534,6 +26568,7 @@
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
(V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+ (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
(V4TI "TARGET_AVX512F")])