aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-06-02 18:16:38 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-06-02 18:16:38 +0100
commit5cc7a54c2e91d82cb6a52e4921325c511fd90712 (patch)
tree2be3b3890af62f5763d51f347836a5b754599b1b
parent98d59d5dd8b662ba8ec7c522faa9b88823389711 (diff)
parent71b04329c4f7d5824a289ca5225e1883a278cf3b (diff)
downloadqemu-5cc7a54c2e91d82cb6a52e4921325c511fd90712.zip
qemu-5cc7a54c2e91d82cb6a52e4921325c511fd90712.tar.gz
qemu-5cc7a54c2e91d82cb6a52e4921325c511fd90712.tar.bz2
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200602' into staging
Vector rotate support Signal handling support for NetBSD arm/aarch64 # gpg: Signature made Tue 02 Jun 2020 17:43:05 BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full] # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth/tags/pull-tcg-20200602: accel/tcg: Provide a NetBSD specific aarch64 cpu_signal_handler accel/tcg: Adjust cpu_signal_handler for NetBSD/arm tcg: Improve move ops in liveness_pass_2 target/s390x: Use tcg_gen_gvec_rotl{i,s,v} target/ppc: Use tcg_gen_gvec_rotlv tcg/ppc: Implement INDEX_op_rot[lr]v_vec tcg/aarch64: Implement INDEX_op_rotl{i,v}_vec tcg/i386: Implement INDEX_op_rotl{i,s,v}_vec tcg: Implement gvec support for rotate by scalar tcg: Remove expansion to shift by vector from do_shifts tcg: Implement gvec support for rotate by vector tcg: Implement gvec support for rotate by immediate Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--accel/tcg/tcg-runtime-gvec.c144
-rw-r--r--accel/tcg/tcg-runtime.h15
-rw-r--r--accel/tcg/user-exec.c43
-rw-r--r--include/tcg/tcg-op-gvec.h12
-rw-r--r--include/tcg/tcg-op.h5
-rw-r--r--include/tcg/tcg-opc.h4
-rw-r--r--include/tcg/tcg.h3
-rw-r--r--target/ppc/helper.h4
-rw-r--r--target/ppc/int_helper.c17
-rw-r--r--target/ppc/translate/vmx-impl.inc.c8
-rw-r--r--target/s390x/helper.h4
-rw-r--r--target/s390x/insn-data.def4
-rw-r--r--target/s390x/translate_vx.inc.c66
-rw-r--r--target/s390x/vec_int_helper.c31
-rw-r--r--tcg/README7
-rw-r--r--tcg/aarch64/tcg-target.h3
-rw-r--r--tcg/aarch64/tcg-target.inc.c53
-rw-r--r--tcg/aarch64/tcg-target.opc.h1
-rw-r--r--tcg/i386/tcg-target.h3
-rw-r--r--tcg/i386/tcg-target.inc.c116
-rw-r--r--tcg/ppc/tcg-target.h3
-rw-r--r--tcg/ppc/tcg-target.inc.c23
-rw-r--r--tcg/ppc/tcg-target.opc.h1
-rw-r--r--tcg/tcg-op-gvec.c212
-rw-r--r--tcg/tcg-op-vec.c62
-rw-r--r--tcg/tcg.c85
26 files changed, 736 insertions, 193 deletions
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index ca44970..521da4a 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -716,6 +716,54 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ int shift = simd_data(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
+ }
+ clear_high(d, oprsz, desc);
+}
+
void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
@@ -860,6 +908,102 @@ void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
clear_high(d, oprsz, desc);
}
+void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ uint8_t sh = *(uint8_t *)(b + i) & 7;
+ *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ uint8_t sh = *(uint16_t *)(b + i) & 15;
+ *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ uint8_t sh = *(uint32_t *)(b + i) & 31;
+ *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+ intptr_t i;
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ uint8_t sh = *(uint64_t *)(b + i) & 63;
+ *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
+ }
+ clear_high(d, oprsz, desc);
+}
+
#define DO_CMP1(NAME, TYPE, OP) \
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
{ \
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b4..4eda24e 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -259,6 +259,11 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_rotl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -274,6 +279,16 @@ DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_rotr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_rotr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 5235994..d8b027f 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -517,6 +517,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#if defined(__NetBSD__)
#include <ucontext.h>
+#include <sys/siginfo.h>
#endif
int cpu_signal_handler(int host_signum, void *pinfo,
@@ -525,10 +526,12 @@ int cpu_signal_handler(int host_signum, void *pinfo,
siginfo_t *info = pinfo;
#if defined(__NetBSD__)
ucontext_t *uc = puc;
+ siginfo_t *si = pinfo;
#else
ucontext_t *uc = puc;
#endif
unsigned long pc;
+ uint32_t fsr;
int is_write;
#if defined(__NetBSD__)
@@ -539,15 +542,48 @@ int cpu_signal_handler(int host_signum, void *pinfo,
pc = uc->uc_mcontext.arm_pc;
#endif
- /* error_code is the FSR value, in which bit 11 is WnR (assuming a v6 or
- * later processor; on v5 we will always report this as a read).
+#ifdef __NetBSD__
+ fsr = si->si_trap;
+#else
+ fsr = uc->uc_mcontext.error_code;
+#endif
+ /*
+ * In the FSR, bit 11 is WnR, assuming a v6 or
+ * later processor. On v5 we will always report
+ * this as a read, which will fail later.
*/
- is_write = extract32(uc->uc_mcontext.error_code, 11, 1);
+ is_write = extract32(fsr, 11, 1);
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
}
#elif defined(__aarch64__)
+#if defined(__NetBSD__)
+
+#include <ucontext.h>
+#include <sys/siginfo.h>
+
+int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
+{
+ ucontext_t *uc = puc;
+ siginfo_t *si = pinfo;
+ unsigned long pc;
+ int is_write;
+ uint32_t esr;
+
+ pc = uc->uc_mcontext.__gregs[_REG_PC];
+ esr = si->si_trap;
+
+ /*
+ * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
+ * is 0b10010x: then bit 6 is the WnR bit
+ */
+ is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
+ return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
+}
+
+#else
+
#ifndef ESR_MAGIC
/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
#define ESR_MAGIC 0x45535201
@@ -610,6 +646,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
}
return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
}
+#endif
#elif defined(__s390__)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index cea6497..c69a7de 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -334,6 +334,10 @@ void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
@@ -341,6 +345,8 @@ void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector shift by vector element, modulo the element size.
@@ -352,6 +358,10 @@ void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, uint32_t bofs,
@@ -388,5 +398,7 @@ void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
#endif
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index e3399d6..5abf17f 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -999,14 +999,19 @@ void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s);
void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec s);
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
TCGv_vec a, TCGv_vec b);
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 9288a04..e3929b8 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -248,14 +248,18 @@ DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
+DEF(rotli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_roti_vec))
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
+DEF(rotls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rots_vec))
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
+DEF(rotlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
+DEF(rotrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_rotv_vec))
DEF(cmp_vec, 1, 2, 1, IMPLVEC)
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index c48bd76..380014e 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -182,6 +182,9 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 0
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 2dfa1c6..90166cb 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -214,10 +214,6 @@ DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
-DEF_HELPER_3(vrlb, void, avr, avr, avr)
-DEF_HELPER_3(vrlh, void, avr, avr, avr)
-DEF_HELPER_3(vrlw, void, avr, avr, avr)
-DEF_HELPER_3(vrld, void, avr, avr, avr)
DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
DEF_HELPER_3(vextractub, void, avr, avr, i32)
DEF_HELPER_3(vextractuh, void, avr, avr, i32)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index be53cd6..d8bd3c2 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1348,23 +1348,6 @@ VRFI(p, float_round_up)
VRFI(z, float_round_to_zero)
#undef VRFI
-#define VROTATE(suffix, element, mask) \
- void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
- { \
- int i; \
- \
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- unsigned int shift = b->element[i] & mask; \
- r->element[i] = (a->element[i] << shift) | \
- (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
- } \
- }
-VROTATE(b, u8, 0x7)
-VROTATE(h, u16, 0xF)
-VROTATE(w, u32, 0x1F)
-VROTATE(d, u64, 0x3F)
-#undef VROTATE
-
void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
{
int i;
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 403ed3a..de2fd13 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -900,13 +900,13 @@ GEN_VXFORM3(vsubeuqm, 31, 0);
GEN_VXFORM3(vsubecuq, 31, 0);
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vrlb, 2, 0);
-GEN_VXFORM(vrlh, 2, 1);
-GEN_VXFORM(vrlw, 2, 2);
+GEN_VXFORM_V(vrlb, MO_8, tcg_gen_gvec_rotlv, 2, 0);
+GEN_VXFORM_V(vrlh, MO_16, tcg_gen_gvec_rotlv, 2, 1);
+GEN_VXFORM_V(vrlw, MO_32, tcg_gen_gvec_rotlv, 2, 2);
GEN_VXFORM(vrlwmi, 2, 2);
GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
vrlwmi, PPC_NONE, PPC2_ISA300)
-GEN_VXFORM(vrld, 2, 3);
+GEN_VXFORM_V(vrld, MO_64, tcg_gen_gvec_rotlv, 2, 3);
GEN_VXFORM(vrldmi, 2, 3);
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
vrldmi, PPC_NONE, PPC2_ISA300)
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index b5813c2..b7887b5 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -198,10 +198,6 @@ DEF_HELPER_FLAGS_4(gvec_vmlo16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vmlo32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verllv8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verllv16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
-DEF_HELPER_FLAGS_4(gvec_verll8, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
-DEF_HELPER_FLAGS_4(gvec_verll16, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 2bc77f0..91ddaed 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1147,8 +1147,8 @@
/* VECTOR POPULATION COUNT */
F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
- F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, verllv, 0, IF_VEC)
- F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, verll, 0, IF_VEC)
+ F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC)
+ F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC)
/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC)
/* VECTOR ELEMENT SHIFT LEFT */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 12347f8..eb767f5 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -1825,63 +1825,6 @@ static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static void gen_rll_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t0 = tcg_temp_new_i32();
-
- tcg_gen_andi_i32(t0, b, 31);
- tcg_gen_rotl_i32(d, a, t0);
- tcg_temp_free_i32(t0);
-}
-
-static void gen_rll_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t0 = tcg_temp_new_i64();
-
- tcg_gen_andi_i64(t0, b, 63);
- tcg_gen_rotl_i64(d, a, t0);
- tcg_temp_free_i64(t0);
-}
-
-static DisasJumpType op_verllv(DisasContext *s, DisasOps *o)
-{
- const uint8_t es = get_field(s, m4);
- static const GVecGen3 g[4] = {
- { .fno = gen_helper_gvec_verllv8, },
- { .fno = gen_helper_gvec_verllv16, },
- { .fni4 = gen_rll_i32, },
- { .fni8 = gen_rll_i64, },
- };
-
- if (es > ES_64) {
- gen_program_exception(s, PGM_SPECIFICATION);
- return DISAS_NORETURN;
- }
-
- gen_gvec_3(get_field(s, v1), get_field(s, v2),
- get_field(s, v3), &g[es]);
- return DISAS_NEXT;
-}
-
-static DisasJumpType op_verll(DisasContext *s, DisasOps *o)
-{
- const uint8_t es = get_field(s, m4);
- static const GVecGen2s g[4] = {
- { .fno = gen_helper_gvec_verll8, },
- { .fno = gen_helper_gvec_verll16, },
- { .fni4 = gen_rll_i32, },
- { .fni8 = gen_rll_i64, },
- };
-
- if (es > ES_64) {
- gen_program_exception(s, PGM_SPECIFICATION);
- return DISAS_NORETURN;
- }
- gen_gvec_2s(get_field(s, v1), get_field(s, v3), o->addr1,
- &g[es]);
- return DISAS_NEXT;
-}
-
static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
{
TCGv_i32 t = tcg_temp_new_i32();
@@ -1946,6 +1889,9 @@ static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
case 0x70:
gen_gvec_fn_3(shlv, es, v1, v2, v3);
break;
+ case 0x73:
+ gen_gvec_fn_3(rotlv, es, v1, v2, v3);
+ break;
case 0x7a:
gen_gvec_fn_3(sarv, es, v1, v2, v3);
break;
@@ -1977,6 +1923,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
case 0x30:
gen_gvec_fn_2i(shli, es, v1, v3, d2);
break;
+ case 0x33:
+ gen_gvec_fn_2i(rotli, es, v1, v3, d2);
+ break;
case 0x3a:
gen_gvec_fn_2i(sari, es, v1, v3, d2);
break;
@@ -1994,6 +1943,9 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
case 0x30:
gen_gvec_fn_2s(shls, es, v1, v3, shift);
break;
+ case 0x33:
+ gen_gvec_fn_2s(rotls, es, v1, v3, shift);
+ break;
case 0x3a:
gen_gvec_fn_2s(sars, es, v1, v3, shift);
break;
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
index 0d6bc13..5561b3e 100644
--- a/target/s390x/vec_int_helper.c
+++ b/target/s390x/vec_int_helper.c
@@ -515,37 +515,6 @@ void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \
DEF_VPOPCT(8)
DEF_VPOPCT(16)
-#define DEF_VERLLV(BITS) \
-void HELPER(gvec_verllv##BITS)(void *v1, const void *v2, const void *v3, \
- uint32_t desc) \
-{ \
- int i; \
- \
- for (i = 0; i < (128 / BITS); i++) { \
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
- const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \
- \
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, b)); \
- } \
-}
-DEF_VERLLV(8)
-DEF_VERLLV(16)
-
-#define DEF_VERLL(BITS) \
-void HELPER(gvec_verll##BITS)(void *v1, const void *v2, uint64_t count, \
- uint32_t desc) \
-{ \
- int i; \
- \
- for (i = 0; i < (128 / BITS); i++) { \
- const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \
- \
- s390_vec_write_element##BITS(v1, i, rol##BITS(a, count)); \
- } \
-}
-DEF_VERLL(8)
-DEF_VERLL(16)
-
#define DEF_VERIM(BITS) \
void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \
uint32_t desc) \
diff --git a/tcg/README b/tcg/README
index bfa2e4e..a64f678 100644
--- a/tcg/README
+++ b/tcg/README
@@ -605,10 +605,11 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
* shri_vec v0, v1, i2
* sari_vec v0, v1, i2
+* rotli_vec v0, v1, i2
* shrs_vec v0, v1, s2
* sars_vec v0, v1, s2
- Similarly for logical and arithmetic right shift.
+ Similarly for logical and arithmetic right shift, and left rotate.
* shlv_vec v0, v1, v2
@@ -620,8 +621,10 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
* shrv_vec v0, v1, v2
* sarv_vec v0, v1, v2
+* rotlv_vec v0, v1, v2
+* rotrv_vec v0, v1, v2
- Similarly for logical and arithmetic right shift.
+ Similarly for logical and arithmetic right shift, and rotates.
* cmp_vec v0, v1, v2, cond
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index ca214f6..9bc2a5e 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -133,6 +133,9 @@ typedef enum {
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 843fd0c..760b0e7 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -557,6 +557,7 @@ typedef enum {
I3614_SSHR = 0x0f000400,
I3614_SSRA = 0x0f001400,
I3614_SHL = 0x0f005400,
+ I3614_SLI = 0x2f005400,
I3614_USHR = 0x2f000400,
I3614_USRA = 0x2f001400,
@@ -2411,6 +2412,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sari_vec:
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
break;
+ case INDEX_op_aa64_sli_vec:
+ tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+ break;
case INDEX_op_shlv_vec:
tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
break;
@@ -2498,8 +2502,11 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shlv_vec:
case INDEX_op_bitsel_vec:
return 1;
+ case INDEX_op_rotli_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
return -1;
case INDEX_op_mul_vec:
case INDEX_op_smax_vec:
@@ -2517,14 +2524,24 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGv_vec v0, v1, v2, t1;
+ TCGv_vec v0, v1, v2, t1, t2;
+ TCGArg a2;
va_start(va, a0);
v0 = temp_tcgv_vec(arg_temp(a0));
v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
- v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+ v2 = temp_tcgv_vec(arg_temp(a2));
switch (opc) {
+ case INDEX_op_rotli_vec:
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
+ vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
+ tcg_temp_free_vec(t1);
+ break;
+
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
/* Right shifts are negative left shifts for AArch64. */
@@ -2537,6 +2554,35 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
tcg_temp_free_vec(t1);
break;
+ case INDEX_op_rotlv_vec:
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_dupi_vec(vece, t1, 8 << vece);
+ tcg_gen_sub_vec(vece, t1, v2, t1);
+ /* Right shifts are negative left shifts for AArch64. */
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ tcg_gen_or_vec(vece, v0, v0, t1);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotrv_vec:
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t1, v2);
+ tcg_gen_dupi_vec(vece, t2, 8 << vece);
+ tcg_gen_add_vec(vece, t2, t1, t2);
+ /* Right shifts are negative left shifts for AArch64. */
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+ tcg_gen_or_vec(vece, v0, t1, t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
default:
g_assert_not_reached();
}
@@ -2557,6 +2603,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
+ static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
@@ -2751,6 +2798,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return &w_w_wZ;
case INDEX_op_bitsel_vec:
return &w_w_w_w;
+ case INDEX_op_aa64_sli_vec:
+ return &w_0_w;
default:
return NULL;
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
index 26bfd9c..bce30ac 100644
--- a/tcg/aarch64/tcg-target.opc.h
+++ b/tcg/aarch64/tcg-target.opc.h
@@ -12,3 +12,4 @@
*/
DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index bfb3f5f..99ac1e3 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -183,6 +183,9 @@ extern bool have_avx2;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index ec083bd..ae02282 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -3233,6 +3233,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
+ case INDEX_op_rotls_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@@ -3271,6 +3272,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
return 1;
+ case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
@@ -3297,12 +3299,17 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return vece >= MO_16;
case INDEX_op_sars_vec:
return vece >= MO_16 && vece <= MO_32;
+ case INDEX_op_rotls_vec:
+ return vece >= MO_16 ? -1 : 0;
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
return have_avx2 && vece >= MO_32;
case INDEX_op_sarv_vec:
return have_avx2 && vece == MO_32;
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return have_avx2 && vece >= MO_32 ? -1 : 0;
case INDEX_op_mul_vec:
if (vece == MO_8) {
@@ -3331,7 +3338,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
-static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
TCGv_vec t1, t2;
@@ -3341,26 +3348,31 @@ static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
- /* Unpack to W, shift, and repack. Tricky bits:
- (1) Use punpck*bw x,x to produce DDCCBBAA,
- i.e. duplicate in other half of the 16-bit lane.
- (2) For right-shift, add 8 so that the high half of
- the lane becomes zero. For left-shift, we must
- shift up and down again.
- (3) Step 2 leaves high half zero such that PACKUSWB
- (pack with unsigned saturation) does not modify
- the quantity. */
+ /*
+ * Unpack to W, shift, and repack. Tricky bits:
+ * (1) Use punpck*bw x,x to produce DDCCBBAA,
+ * i.e. duplicate in other half of the 16-bit lane.
+ * (2) For right-shift, add 8 so that the high half of the lane
+ * becomes zero. For left-shift, and left-rotate, we must
+ * shift up and down again.
+ * (3) Step 2 leaves high half zero such that PACKUSWB
+ * (pack with unsigned saturation) does not modify
+ * the quantity.
+ */
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
- if (shr) {
- tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
- tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
+ if (opc != INDEX_op_rotli_vec) {
+ imm += 8;
+ }
+ if (opc == INDEX_op_shri_vec) {
+ tcg_gen_shri_vec(MO_16, t1, t1, imm);
+ tcg_gen_shri_vec(MO_16, t2, t2, imm);
} else {
- tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
- tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
+ tcg_gen_shli_vec(MO_16, t1, t1, imm);
+ tcg_gen_shli_vec(MO_16, t2, t2, imm);
tcg_gen_shri_vec(MO_16, t1, t1, 8);
tcg_gen_shri_vec(MO_16, t2, t2, 8);
}
@@ -3427,6 +3439,61 @@ static void expand_vec_sari(TCGType type, unsigned vece,
}
}
+static void expand_vec_rotli(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+ TCGv_vec t;
+
+ if (vece == MO_8) {
+ expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
+ return;
+ }
+
+ t = tcg_temp_new_vec(type);
+ tcg_gen_shli_vec(vece, t, v1, imm);
+ tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+}
+
+static void expand_vec_rotls(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+ TCGv_i32 rsh;
+ TCGv_vec t;
+
+ tcg_debug_assert(vece != MO_8);
+
+ t = tcg_temp_new_vec(type);
+ rsh = tcg_temp_new_i32();
+
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+ tcg_gen_shls_vec(vece, t, v1, lsh);
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+ tcg_temp_free_i32(rsh);
+}
+
+static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec sh, bool right)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ tcg_gen_dupi_vec(vece, t, 8 << vece);
+ tcg_gen_sub_vec(vece, t, t, sh);
+ if (right) {
+ tcg_gen_shlv_vec(vece, t, v1, t);
+ tcg_gen_shrv_vec(vece, v0, v1, sh);
+ } else {
+ tcg_gen_shrv_vec(vece, t, v1, t);
+ tcg_gen_shlv_vec(vece, v0, v1, sh);
+ }
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+}
+
static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
{
@@ -3636,13 +3703,30 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
switch (opc) {
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
- expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
+ expand_vec_shi(type, vece, opc, v0, v1, a2);
break;
case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
+ case INDEX_op_rotli_vec:
+ expand_vec_rotli(type, vece, v0, v1, a2);
+ break;
+
+ case INDEX_op_rotls_vec:
+ expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
+ break;
+
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_rotv(type, vece, v0, v1, v2, false);
+ break;
+ case INDEX_op_rotrv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_rotv(type, vece, v0, v1, v2, true);
+ break;
+
case INDEX_op_mul_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 4fa21f0..be5b290 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -161,6 +161,9 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 1
#define TCG_TARGET_HAS_shi_vec 0
#define TCG_TARGET_HAS_shs_vec 0
#define TCG_TARGET_HAS_shv_vec 1
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index ee1f922..7da6708 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -2995,6 +2995,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
return vece <= MO_32 || have_isa_2_07;
case INDEX_op_ssadd_vec:
case INDEX_op_sssub_vec:
@@ -3005,6 +3006,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
case INDEX_op_neg_vec:
return vece >= MO_32 && have_isa_3_00;
@@ -3019,6 +3021,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
return 0;
case INDEX_op_bitsel_vec:
return have_vsx;
+ case INDEX_op_rotrv_vec:
+ return -1;
default:
return 0;
}
@@ -3301,7 +3305,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_ppc_pkum_vec:
insn = pkum_op[vece];
break;
- case INDEX_op_ppc_rotl_vec:
+ case INDEX_op_rotlv_vec:
insn = rotl_op[vece];
break;
case INDEX_op_ppc_msum_vec:
@@ -3409,7 +3413,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
t3 = tcg_temp_new_vec(type);
t4 = tcg_temp_new_vec(type);
tcg_gen_dupi_vec(MO_8, t4, -16);
- vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1),
+ vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
tcgv_vec_arg(v2), tcgv_vec_arg(t4));
vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
tcgv_vec_arg(v1), tcgv_vec_arg(v2));
@@ -3434,7 +3438,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
{
va_list va;
- TCGv_vec v0, v1, v2;
+ TCGv_vec v0, v1, v2, t0;
TCGArg a2;
va_start(va, a0);
@@ -3452,6 +3456,9 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
case INDEX_op_sari_vec:
expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
break;
+ case INDEX_op_rotli_vec:
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
+ break;
case INDEX_op_cmp_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
@@ -3460,6 +3467,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);
break;
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t0 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t0, v2);
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
+ tcg_temp_free_vec(t0);
+ break;
default:
g_assert_not_reached();
}
@@ -3664,12 +3678,13 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
case INDEX_op_ppc_mrgh_vec:
case INDEX_op_ppc_mrgl_vec:
case INDEX_op_ppc_muleu_vec:
case INDEX_op_ppc_mulou_vec:
case INDEX_op_ppc_pkum_vec:
- case INDEX_op_ppc_rotl_vec:
case INDEX_op_dup2_vec:
return &v_v_v;
case INDEX_op_not_vec:
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
index 1373f77..db51440 100644
--- a/tcg/ppc/tcg-target.opc.h
+++ b/tcg/ppc/tcg-target.opc.h
@@ -30,4 +30,3 @@ DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
-DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 049a55e..3707c0e 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -2694,6 +2694,74 @@ void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
}
}
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 8 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 16 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_rotl8i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_rotl16i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotli_i32,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotli_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
+ oprsz, maxsz);
+}
+
/*
* Specialized generation vector shifts by a non-constant scalar.
*/
@@ -2908,6 +2976,28 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
}
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2sh g = {
+ .fni4 = tcg_gen_rotl_i32,
+ .fni8 = tcg_gen_rotl_i64,
+ .fniv_s = tcg_gen_rotls_vec,
+ .fniv_v = tcg_gen_rotlv_vec,
+ .fno = {
+ gen_helper_gvec_rotl8i,
+ gen_helper_gvec_rotl16i,
+ gen_helper_gvec_rotl32i,
+ gen_helper_gvec_rotl64i,
+ },
+ .s_list = { INDEX_op_rotls_vec, 0 },
+ .v_list = { INDEX_op_rotlv_vec, 0 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
/*
* Expand D = A << (B % element bits)
*
@@ -3103,6 +3193,128 @@ void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
}
+/*
+ * Similarly for rotates.
+ */
+
+static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, t, t, b);
+ tcg_gen_rotlv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_rotl_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_rotl_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotl_mod_i32,
+ .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotl_mod_i64,
+ .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, t, (8 << vece) - 1);
+ tcg_gen_and_vec(vece, t, t, b);
+ tcg_gen_rotrv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_rotr_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_rotr_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotr_mod_i32,
+ .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotr_mod_i64,
+ .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
uint32_t oprsz, TCGCond cond)
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index b6937e8..f784517 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -545,6 +545,18 @@ void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
}
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
+}
+
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ int bits = 8 << vece;
+ tcg_debug_assert(i >= 0 && i < bits);
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
+}
+
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
@@ -684,8 +696,18 @@ void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
}
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
+}
+
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
+}
+
static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
- TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
+ TCGv_i32 s, TCGOpcode opc)
{
TCGTemp *rt = tcgv_vec_temp(r);
TCGTemp *at = tcgv_vec_temp(a);
@@ -694,48 +716,40 @@ static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
TCGArg ai = temp_arg(at);
TCGArg si = temp_arg(st);
TCGType type = rt->base_type;
- const TCGOpcode *hold_list;
int can;
tcg_debug_assert(at->base_type >= type);
- tcg_assert_listed_vecop(opc_s);
- hold_list = tcg_swap_vecop_list(NULL);
-
- can = tcg_can_emit_vec_op(opc_s, type, vece);
+ tcg_assert_listed_vecop(opc);
+ can = tcg_can_emit_vec_op(opc, type, vece);
if (can > 0) {
- vec_gen_3(opc_s, type, vece, ri, ai, si);
+ vec_gen_3(opc, type, vece, ri, ai, si);
} else if (can < 0) {
- tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_expand_vec_op(opc, type, vece, ri, ai, si);
+ tcg_swap_vecop_list(hold_list);
} else {
- TCGv_vec vec_s = tcg_temp_new_vec(type);
-
- if (vece == MO_64) {
- TCGv_i64 s64 = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(s64, s);
- tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
- tcg_temp_free_i64(s64);
- } else {
- tcg_gen_dup_i32_vec(vece, vec_s, s);
- }
- do_op3_nofail(vece, r, a, vec_s, opc_v);
- tcg_temp_free_vec(vec_s);
+ g_assert_not_reached();
}
- tcg_swap_vecop_list(hold_list);
}
void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
- do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
+ do_shifts(vece, r, a, b, INDEX_op_shls_vec);
}
void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
- do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
+ do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
}
void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
{
- do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
+ do_shifts(vece, r, a, b, INDEX_op_sars_vec);
+}
+
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
+{
+ do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
}
void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a2268d9..1aa6cb4 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1661,6 +1661,13 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
return have_vec && TCG_TARGET_HAS_shv_vec;
+ case INDEX_op_rotli_vec:
+ return have_vec && TCG_TARGET_HAS_roti_vec;
+ case INDEX_op_rotls_vec:
+ return have_vec && TCG_TARGET_HAS_rots_vec;
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return have_vec && TCG_TARGET_HAS_rotv_vec;
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
@@ -2975,34 +2982,68 @@ static bool liveness_pass_2(TCGContext *s)
}
/* Outputs become available. */
- for (i = 0; i < nb_oargs; i++) {
- arg_ts = arg_temp(op->args[i]);
+ if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
+ arg_ts = arg_temp(op->args[0]);
dir_ts = arg_ts->state_ptr;
- if (!dir_ts) {
- continue;
+ if (dir_ts) {
+ op->args[0] = temp_arg(dir_ts);
+ changes = true;
+
+ /* The output is now live and modified. */
+ arg_ts->state = 0;
+
+ if (NEED_SYNC_ARG(0)) {
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+ TCGTemp *out_ts = dir_ts;
+
+ if (IS_DEAD_ARG(0)) {
+ out_ts = arg_temp(op->args[1]);
+ arg_ts->state = TS_DEAD;
+ tcg_op_remove(s, op);
+ } else {
+ arg_ts->state = TS_MEM;
+ }
+
+ sop->args[0] = temp_arg(out_ts);
+ sop->args[1] = temp_arg(arg_ts->mem_base);
+ sop->args[2] = arg_ts->mem_offset;
+ } else {
+ tcg_debug_assert(!IS_DEAD_ARG(0));
+ }
}
- op->args[i] = temp_arg(dir_ts);
- changes = true;
+ } else {
+ for (i = 0; i < nb_oargs; i++) {
+ arg_ts = arg_temp(op->args[i]);
+ dir_ts = arg_ts->state_ptr;
+ if (!dir_ts) {
+ continue;
+ }
+ op->args[i] = temp_arg(dir_ts);
+ changes = true;
- /* The output is now live and modified. */
- arg_ts->state = 0;
+ /* The output is now live and modified. */
+ arg_ts->state = 0;
- /* Sync outputs upon their last write. */
- if (NEED_SYNC_ARG(i)) {
- TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
- ? INDEX_op_st_i32
- : INDEX_op_st_i64);
- TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+ /* Sync outputs upon their last write. */
+ if (NEED_SYNC_ARG(i)) {
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
- sop->args[0] = temp_arg(dir_ts);
- sop->args[1] = temp_arg(arg_ts->mem_base);
- sop->args[2] = arg_ts->mem_offset;
+ sop->args[0] = temp_arg(dir_ts);
+ sop->args[1] = temp_arg(arg_ts->mem_base);
+ sop->args[2] = arg_ts->mem_offset;
- arg_ts->state = TS_MEM;
- }
- /* Drop outputs that are dead. */
- if (IS_DEAD_ARG(i)) {
- arg_ts->state = TS_DEAD;
+ arg_ts->state = TS_MEM;
+ }
+ /* Drop outputs that are dead. */
+ if (IS_DEAD_ARG(i)) {
+ arg_ts->state = TS_DEAD;
+ }
}
}
}