diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-06-23 16:14:31 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-06-23 16:16:18 +0200 |
commit | 37e93925366676201b526624e9f8dc32d82b4ff2 (patch) | |
tree | ed32b90a49693afd597530f0cb5be3c673437b5c /gcc | |
parent | 371c1992624c9269e2d5747561a8b27b30e485ee (diff) | |
download | gcc-37e93925366676201b526624e9f8dc32d82b4ff2.zip gcc-37e93925366676201b526624e9f8dc32d82b4ff2.tar.gz gcc-37e93925366676201b526624e9f8dc32d82b4ff2.tar.bz2 |
i386: Add PPERM two-operand 64bit vector permutation [PR89021]
Add emulation of V8QI PPERM permutations for TARGET_XOP target. Similar
to PSHUFB, the permutation is performed with V16QI PPERM instruction,
where selector is defined in V16QI mode with inactive elements set to 0x80.
Specific to two operand permutations is the remapping of elements from
the second operand (e.g. e[8] -> e[16]), as we have to account for the
inactive elements from the first operand.
2021-06-23 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/89021
* config/i386/i386-expand.c (expand_vec_perm_pshufb):
Handle 64bit modes for TARGET_XOP. Use indirect gen_* functions.
* config/i386/mmx.md (mmx_ppermv64): New insn pattern.
* config/i386/i386.md (unspec): Move UNSPEC_XOP_PERMUTE from ...
* config/i386/sse.md (unspec): ... here.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 75 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 1 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 13 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 1 |
4 files changed, 75 insertions, 15 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2986b49..9c922bf 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -17467,10 +17467,23 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (!d->one_operand_p) { - if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) + if (GET_MODE_SIZE (d->vmode) == 8) + { + if (!TARGET_XOP) + return false; + vmode = V8QImode; + } + else if (GET_MODE_SIZE (d->vmode) == 16) + { + if (!TARGET_XOP) + return false; + } + else if (GET_MODE_SIZE (d->vmode) == 32) { - if (TARGET_AVX2 - && valid_perm_using_mode_p (V2TImode, d)) + if (!TARGET_AVX2) + return false; + + if (valid_perm_using_mode_p (V2TImode, d)) { if (d->testing_p) return true; @@ -17492,6 +17505,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) } return false; } + else + return false; } else { @@ -17651,8 +17666,22 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { rtx m128 = GEN_INT (-128); + /* Remap elements from the second operand, as we have to + account for inactive top 8 elements from the first operand. */ + if (!d->one_operand_p) + for (i = 0; i < nelt; ++i) + { + int ival = INTVAL (rperm[i]); + if (ival >= 8) + ival += 8; + rperm[i] = GEN_INT (ival); + } + + /* V8QI is emulated with V16QI instruction, fill inactive + elements in the top 8 positions with zeros. */ for (i = nelt; i < 16; ++i) rperm[i] = m128; + vpmode = V16QImode; } @@ -17660,36 +17689,54 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) gen_rtvec_v (GET_MODE_NUNITS (vpmode), rperm)); vperm = force_reg (vpmode, vperm); - target = d->target; - if (d->vmode != vmode) + if (vmode == d->vmode) + target = d->target; + else target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, d->op0); + if (d->one_operand_p) { + rtx (*gen) (rtx, rtx, rtx); + if (vmode == V8QImode) - emit_insn (gen_mmx_pshufbv8qi3 (target, op0, vperm)); + gen = gen_mmx_pshufbv8qi3; else if (vmode == V16QImode) - emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); + gen = gen_ssse3_pshufbv16qi3; else if (vmode == V32QImode) - emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + gen = gen_avx2_pshufbv32qi3; else if (vmode == V64QImode) - emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); + gen = gen_avx512bw_pshufbv64qi3; else if (vmode == V8SFmode) - emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); + gen = gen_avx2_permvarv8sf; else if (vmode == V8SImode) - emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); + gen = gen_avx2_permvarv8si; else if (vmode == V16SFmode) - emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); + gen = gen_avx512f_permvarv16sf; else if (vmode == V16SImode) - emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); + gen = gen_avx512f_permvarv16si; else gcc_unreachable (); + + emit_insn (gen (target, op0, vperm)); } else { + rtx (*gen) (rtx, rtx, rtx, rtx); + op1 = gen_lowpart (vmode, d->op1); - emit_insn (gen_xop_pperm (target, op0, op1, vperm)); + + if (vmode == V8QImode) + gen = gen_mmx_ppermv64; + else if (vmode == V16QImode) + gen = gen_xop_pperm; + else + gcc_unreachable (); + + emit_insn (gen (target, op0, op1, vperm)); } + if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4e24210..9043be3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -120,6 +120,7 @@ UNSPEC_MOVMSK UNSPEC_BLENDV UNSPEC_PSHUFB + UNSPEC_XOP_PERMUTE UNSPEC_RCP UNSPEC_RSQRT UNSPEC_PSADBW diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a107ac5..7a827dc 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2331,6 +2331,19 @@ "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) +;; XOP permute instructions +(define_insn "mmx_ppermv64" + [(set (match_operand:V8QI 0 "register_operand" "=x") + (unspec:V8QI + [(match_operand:V8QI 1 "register_operand" "x") + (match_operand:V8QI 2 "register_operand" "x") + (match_operand:V16QI 3 "nonimmediate_operand" "xm")] + UNSPEC_XOP_PERMUTE))] + "TARGET_XOP && TARGET_MMX_WITH_SSE" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f5f9403..c5f739c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -53,7 +53,6 @@ UNSPEC_FMADDSUB UNSPEC_XOP_UNSIGNED_CMP UNSPEC_XOP_TRUEFALSE - UNSPEC_XOP_PERMUTE UNSPEC_FRCZ ;; For AES support |