aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2025-01-08 23:11:17 -0800
committerliuhongt <hongtao.liu@intel.com>2025-01-12 17:40:24 -0800
commit0e05b793fba2a9bea9f0fbb1f068679f5dadf514 (patch)
treefabcb688097e906062d0193a9400549a0e332993
parent9c387a99a911724546abe99ecd39bfc968ed6333 (diff)
downloadgcc-0e05b793fba2a9bea9f0fbb1f068679f5dadf514.zip
gcc-0e05b793fba2a9bea9f0fbb1f068679f5dadf514.tar.gz
gcc-0e05b793fba2a9bea9f0fbb1f068679f5dadf514.tar.bz2
Refactor ix86_expand_vecop_qihi2.
Since there's regression to use vpermq, and it's manually disabled by !TARGET_AVX512BW. I remove the codes related to vpermq and make ix86_expand_vecop_qihi2 only handle vpmovbw + op + vpmovwb case. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2): Refactor to avoid redundant TARGET_AVX512BW in many places.
-rw-r--r--gcc/config/i386/i386-expand.cc39
1 files changed, 5 insertions, 34 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 2ab5787..da03083 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24864,11 +24864,9 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
generic permutation to merge the data back into the right place. This
permutation results in VPERMQ, which is slow, so better fall back to
ix86_expand_vecop_qihi. */
- if (!TARGET_AVX512BW)
- return false;
-
- if ((qimode == V16QImode && !TARGET_AVX2)
- || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
+ if (!TARGET_AVX512BW
+ || (qimode == V16QImode && !TARGET_AVX512VL)
+ || (qimode == V32QImode && !TARGET_EVEX512)
/* There are no V64HImode instructions. */
|| qimode == V64QImode)
return false;
@@ -24883,8 +24881,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
{
case E_V16QImode:
himode = V16HImode;
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen_truncate = gen_truncv16hiv16qi2;
+ gen_truncate = gen_truncv16hiv16qi2;
break;
case E_V32QImode:
himode = V32HImode;
@@ -24926,33 +24923,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
hdest = expand_simple_binop (himode, code, hop1, hop2,
NULL_RTX, 1, OPTAB_DIRECT);
- if (gen_truncate)
- emit_insn (gen_truncate (dest, hdest));
- else
- {
- struct expand_vec_perm_d d;
- rtx wqdest = gen_reg_rtx (wqimode);
- rtx wqres = gen_lowpart (wqimode, hdest);
- bool ok;
- int i;
-
- /* Merge the data back into the right place. */
- d.target = wqdest;
- d.op0 = d.op1 = wqres;
- d.vmode = wqimode;
- d.nelt = GET_MODE_NUNITS (wqimode);
- d.one_operand_p = false;
- d.testing_p = false;
-
- for (i = 0; i < d.nelt; ++i)
- d.perm[i] = i * 2;
-
- ok = ix86_expand_vec_perm_const_1 (&d);
- gcc_assert (ok);
-
- emit_move_insn (dest, gen_lowpart (qimode, wqdest));
- }
-
+ emit_insn (gen_truncate (dest, hdest));
return true;
}