diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2019-06-23 19:04:47 +0200 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2019-10-14 07:10:20 -0700 |
commit | 47c906ae6f54fa10b3f072863d8993e790a14439 (patch) | |
tree | b76a7f6b9782e7403e4e85fa29999bfef487eefb /tcg/ppc | |
parent | 68f340d4cd9f0423039e4706a6602673d7ca9101 (diff) | |
download | qemu-47c906ae6f54fa10b3f072863d8993e790a14439.zip qemu-47c906ae6f54fa10b3f072863d8993e790a14439.tar.gz qemu-47c906ae6f54fa10b3f072863d8993e790a14439.tar.bz2 |
tcg/ppc: Update vector support for VSX
The VSX instruction set instructions include double-word loads and
stores, double-word load and splat, double-word permute, and bit
select. All of which require multiple operations in the Altivec
instruction set.
Because the VSX registers map %vsr32 to %vr0, and we have no current
intention or need to use vector registers outside %vr0-%vr19, force
on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't
have to otherwise modify the VR[TABC] macros.
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com>
Diffstat (limited to 'tcg/ppc')
-rw-r--r-- | tcg/ppc/tcg-target.h | 5 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.inc.c | 52 |
2 files changed, 51 insertions, 6 deletions
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index f50b7f4..c974ca2 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -66,6 +66,7 @@ typedef enum { extern TCGPowerISA have_isa; extern bool have_altivec; +extern bool have_vsx; #define have_isa_2_06 (have_isa >= tcg_isa_2_06) #define have_isa_3_00 (have_isa >= tcg_isa_3_00) @@ -149,7 +150,7 @@ extern bool have_altivec; * instruction and substituting two 32-bit stores makes the generated * code quite large. */ -#define TCG_TARGET_HAS_v64 0 +#define TCG_TARGET_HAS_v64 have_vsx #define TCG_TARGET_HAS_v128 have_altivec #define TCG_TARGET_HAS_v256 0 @@ -165,7 +166,7 @@ extern bool have_altivec; #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 -#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 void flush_icache_range(uintptr_t start, uintptr_t stop); diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c index d739f4b..2388958 100644 --- a/tcg/ppc/tcg-target.inc.c +++ b/tcg/ppc/tcg-target.inc.c @@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr; TCGPowerISA have_isa; static bool have_isel; bool have_altivec; +bool have_vsx; #ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG 30 @@ -467,9 +468,12 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, #define LVEBX XO31(7) #define LVEHX XO31(39) #define LVEWX XO31(71) +#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ +#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ #define STVX XO31(231) #define STVEWX XO31(199) +#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ #define VADDSBS VX4(768) #define VADDUBS VX4(512) @@ -558,6 +562,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, #define VSLDOI VX4(44) +#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ +#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ + #define RT(r) ((r)<<21) #define RS(r) ((r)<<21) #define RA(r) ((r)<<16) @@ -884,11 +891,21 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, add = 0; } - load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); - if (TCG_TARGET_REG_BITS == 64) { - new_pool_l2(s, rel, s->code_ptr, add, val, val); + if (have_vsx) { + load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; + load_insn |= VRT(ret) | RB(TCG_REG_TMP1); + if (TCG_TARGET_REG_BITS == 64) { + new_pool_label(s, val, rel, s->code_ptr, add); + } else { + new_pool_l2(s, rel, s->code_ptr, add, val, val); + } } else { - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); + if (TCG_TARGET_REG_BITS == 64) { + new_pool_l2(s, rel, s->code_ptr, add, val, val); + } else { + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); + } } if (USE_REG_TB) { @@ -1136,6 +1153,10 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, /* fallthru */ case TCG_TYPE_V64: tcg_debug_assert(ret >= TCG_REG_V0); + if (have_vsx) { + tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); + break; + } tcg_debug_assert((offset & 7) == 0); tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); if (offset & 8) { @@ -1180,6 +1201,10 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, /* fallthru */ case TCG_TYPE_V64: tcg_debug_assert(arg >= TCG_REG_V0); + if (have_vsx) { + tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); + break; + } tcg_debug_assert((offset & 7) == 0); if (offset & 8) { tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); @@ -2899,6 +2924,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_shri_vec: case INDEX_op_sari_vec: return vece <= MO_32 ? -1 : 0; + case INDEX_op_bitsel_vec: + return have_vsx; default: return 0; } @@ -2925,6 +2952,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); break; case MO_64: + if (have_vsx) { + tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); + break; + } tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); break; @@ -2968,6 +2999,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); break; case MO_64: + if (have_vsx) { + tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); + break; + } tcg_debug_assert((offset & 7) == 0); tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); @@ -3102,6 +3137,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_bitsel_vec: + tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); + return; + case INDEX_op_dup2_vec: assert(TCG_TARGET_REG_BITS == 32); /* With inputs a1 = xLxx, a2 = xHxx */ @@ -3497,6 +3536,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_st_vec: case INDEX_op_dupm_vec: return &v_r; + case INDEX_op_bitsel_vec: case INDEX_op_ppc_msum_vec: return &v_v_v_v; @@ -3530,6 +3570,10 @@ static void tcg_target_init(TCGContext *s) if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { have_altivec = true; + /* We only care about the portion of VSX that overlaps Altivec. */ + if (hwcap & PPC_FEATURE_HAS_VSX) { + have_vsx = true; + } } tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; |