aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateja Marjanovic <Mateja.Marjanovic@rt-rk.com>2019-03-04 17:51:22 +0100
committerAleksandar Markovic <amarkovic@wavecomp.com>2019-06-01 20:20:20 +0200
commit0df911fd7f482b796c9f10aa8e086fb3fb9f0f18 (patch)
tree8a980a73cf3de6f27a9dfc8717538da1b97a6b12
parentbaf50011157bf5747c623f171f93f9e3d9dff615 (diff)
downloadqemu-0df911fd7f482b796c9f10aa8e086fb3fb9f0f18.zip
qemu-0df911fd7f482b796c9f10aa8e086fb3fb9f0f18.tar.gz
qemu-0df911fd7f482b796c9f10aa8e086fb3fb9f0f18.tar.bz2
target/mips: Improve performance of certain MSA instructions
Eliminate loops for better performance. Following MSA instructions from "UNOP" group are affected: - NLZC.<B|H|W|D> - NLOC.<B|H|W|D> - PCNT.<B|H|W|D> Following MSA instructions from "BINOP" group are affected: - ADD_A.<B|H|W|D> - ADDS_A.<B|H|W|D> - ADDS_S.<B|H|W|D> - ADDS_U.<B|H|W|D> - ADDV.<B|H|W|D> - ASUB_S.<B|H|W|D> - ASUB_U.<B|H|W|D> - AVE_S.<B|H|W|D> - AVE_U.<B|H|W|D> - AVER_S.<B|H|W|D> - AVER_U.<B|H|W|D> - BCLR.<B|H|W|D> - BNEG.<B|H|W|D> - BSET.<B|H|W|D> - CEQ.<B|H|W|D> - CLE_S.<B|H|W|D> - CLE_U.<B|H|W|D> - CLT_S.<B|H|W|D> - CLT_U.<B|H|W|D> - DIV_S.<B|H|W|D> - DIV_U.<B|H|W|D> - DOTP_S.<B|H|W|D> - DOTP_U.<B|H|W|D> - HADD_S.<B|H|W|D> - HADD_U.<B|H|W|D> - HSUB_S.<B|H|W|D> - HSUB_U.<B|H|W|D> - MAX_A.<B|H|W|D> - MAX_S.<B|H|W|D> - MAX_U.<B|H|W|D> - MIN_A.<B|H|W|D> - MIN_S.<B|H|W|D> - MIN_U.<B|H|W|D> - MOD_S.<B|H|W|D> - MOD_U.<B|H|W|D> - MUL_Q.<B|H|W|D> - MULR_Q.<B|H|W|D> - MULV.<B|H|W|D> - SLL.<B|H|W|D> - SRA.<B|H|W|D> - SRAR.<B|H|W|D> - SRL.<B|H|W|D> - SRLR.<B|H|W|D> - SUBS_S.<B|H|W|D> - SUBS_U.<B|H|W|D> - SUBSUS_U.<B|H|W|D> - SUBSUU_S.<B|H|W|D> - SUBV.<B|H|W|D> Following MSA instructions from "TEROP" group are affected: - BINSL.<B|H|W|D> - BINSR.<B|H|W|D> - DPADD_S.<B|H|W|D> - DPADD_U.<B|H|W|D> - DPSUB_S.<B|H|W|D> - DPSUB_U.<B|H|W|D> - MADD_Q.<B|H|W|D> - MADDR_Q.<B|H|W|D> - MADDV.<B|H|W|D> - MSUB_Q.<B|H|W|D> - MSUBR_Q.<B|H|W|D> - MSUBV.<B|H|W|D> Additionally, following MSA instructionas are also affected: - ILVL.<B|H|W|D> - ILVR.<B|H|W|D> - ILVEV.<B|H|W|D> - ILVOD.<B|H|W|D> - PCKEV.<B|H|W|D> - PCKOD.<B|H|W|D> Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com> Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> Message-Id: <1551718283-4487-2-git-send-email-mateja.marjanovic@rt-rk.com>
-rw-r--r--target/mips/msa_helper.c542
1 files changed, 433 insertions, 109 deletions
diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index ee1b1fa..f6e16c2 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -805,28 +805,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \
wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \
wr_t *pws = &(env->active_fpu.fpr[ws].wr); \
wr_t *pwt = &(env->active_fpu.fpr[wt].wr); \
- uint32_t i; \
\
switch (df) { \
case DF_BYTE: \
- for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \
- pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], pwt->b[i]); \
- } \
+ pwd->b[0] = msa_ ## func ## _df(df, pws->b[0], pwt->b[0]); \
+ pwd->b[1] = msa_ ## func ## _df(df, pws->b[1], pwt->b[1]); \
+ pwd->b[2] = msa_ ## func ## _df(df, pws->b[2], pwt->b[2]); \
+ pwd->b[3] = msa_ ## func ## _df(df, pws->b[3], pwt->b[3]); \
+ pwd->b[4] = msa_ ## func ## _df(df, pws->b[4], pwt->b[4]); \
+ pwd->b[5] = msa_ ## func ## _df(df, pws->b[5], pwt->b[5]); \
+ pwd->b[6] = msa_ ## func ## _df(df, pws->b[6], pwt->b[6]); \
+ pwd->b[7] = msa_ ## func ## _df(df, pws->b[7], pwt->b[7]); \
+ pwd->b[8] = msa_ ## func ## _df(df, pws->b[8], pwt->b[8]); \
+ pwd->b[9] = msa_ ## func ## _df(df, pws->b[9], pwt->b[9]); \
+ pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]); \
+ pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]); \
+ pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]); \
+ pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]); \
+ pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]); \
+ pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]); \
break; \
case DF_HALF: \
- for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \
- pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], pwt->h[i]); \
- } \
+ pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]); \
+ pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]); \
+ pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]); \
+ pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]); \
+ pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]); \
+ pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]); \
+ pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]); \
+ pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]); \
break; \
case DF_WORD: \
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \
- pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], pwt->w[i]); \
- } \
+ pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]); \
+ pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]); \
+ pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]); \
+ pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]); \
break; \
case DF_DOUBLE: \
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \
- pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], pwt->d[i]); \
- } \
+ pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]); \
+ pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]); \
break; \
default: \
assert(0); \
@@ -1012,42 +1029,71 @@ static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
}
#define MSA_TEROP_DF(func) \
-void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
- uint32_t ws, uint32_t wt) \
-{ \
- wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \
- wr_t *pws = &(env->active_fpu.fpr[ws].wr); \
- wr_t *pwt = &(env->active_fpu.fpr[wt].wr); \
- uint32_t i; \
- \
- switch (df) { \
- case DF_BYTE: \
- for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \
- pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i], \
- pwt->b[i]); \
- } \
- break; \
- case DF_HALF: \
- for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \
- pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i], \
- pwt->h[i]); \
- } \
- break; \
- case DF_WORD: \
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \
- pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i], \
- pwt->w[i]); \
- } \
- break; \
- case DF_DOUBLE: \
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \
- pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i], \
- pwt->d[i]); \
- } \
- break; \
- default: \
- assert(0); \
- } \
+void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
+ uint32_t ws, uint32_t wt) \
+{ \
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr); \
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr); \
+ \
+ switch (df) { \
+ case DF_BYTE: \
+ pwd->b[0] = msa_ ## func ## _df(df, pwd->b[0], pws->b[0], \
+ pwt->b[0]); \
+ pwd->b[1] = msa_ ## func ## _df(df, pwd->b[1], pws->b[1], \
+ pwt->b[1]); \
+ pwd->b[2] = msa_ ## func ## _df(df, pwd->b[2], pws->b[2], \
+ pwt->b[2]); \
+ pwd->b[3] = msa_ ## func ## _df(df, pwd->b[3], pws->b[3], \
+ pwt->b[3]); \
+ pwd->b[4] = msa_ ## func ## _df(df, pwd->b[4], pws->b[4], \
+ pwt->b[4]); \
+ pwd->b[5] = msa_ ## func ## _df(df, pwd->b[5], pws->b[5], \
+ pwt->b[5]); \
+ pwd->b[6] = msa_ ## func ## _df(df, pwd->b[6], pws->b[6], \
+ pwt->b[6]); \
+ pwd->b[7] = msa_ ## func ## _df(df, pwd->b[7], pws->b[7], \
+ pwt->b[7]); \
+ pwd->b[8] = msa_ ## func ## _df(df, pwd->b[8], pws->b[8], \
+ pwt->b[8]); \
+ pwd->b[9] = msa_ ## func ## _df(df, pwd->b[9], pws->b[9], \
+ pwt->b[9]); \
+ pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10], \
+ pwt->b[10]); \
+ pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11], \
+ pwt->b[11]); \
+ pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12], \
+ pwt->b[12]); \
+ pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13], \
+ pwt->b[13]); \
+ pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14], \
+ pwt->b[14]); \
+ pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15], \
+ pwt->b[15]); \
+ break; \
+ case DF_HALF: \
+ pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
+ pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
+ pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
+ pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
+ pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
+ pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
+ pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
+ pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
+ break; \
+ case DF_WORD: \
+ pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
+ pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
+ pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
+ pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
+ break; \
+ case DF_DOUBLE: \
+ pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
+ pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
+ break; \
+ default: \
+ assert(0); \
+ } \
}
MSA_TEROP_DF(maddv)
@@ -1167,53 +1213,6 @@ void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
#define Rd(pwr, i) (pwr->d[i])
#define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE)/2])
-#define MSA_DO(DF) \
- do { \
- R##DF(pwx, i) = pwt->DF[2*i]; \
- L##DF(pwx, i) = pws->DF[2*i]; \
- } while (0)
-MSA_FN_DF(pckev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF) \
- do { \
- R##DF(pwx, i) = pwt->DF[2*i+1]; \
- L##DF(pwx, i) = pws->DF[2*i+1]; \
- } while (0)
-MSA_FN_DF(pckod_df)
-#undef MSA_DO
-
-#define MSA_DO(DF) \
- do { \
- pwx->DF[2*i] = L##DF(pwt, i); \
- pwx->DF[2*i+1] = L##DF(pws, i); \
- } while (0)
-MSA_FN_DF(ilvl_df)
-#undef MSA_DO
-
-#define MSA_DO(DF) \
- do { \
- pwx->DF[2*i] = R##DF(pwt, i); \
- pwx->DF[2*i+1] = R##DF(pws, i); \
- } while (0)
-MSA_FN_DF(ilvr_df)
-#undef MSA_DO
-
-#define MSA_DO(DF) \
- do { \
- pwx->DF[2*i] = pwt->DF[2*i]; \
- pwx->DF[2*i+1] = pws->DF[2*i]; \
- } while (0)
-MSA_FN_DF(ilvev_df)
-#undef MSA_DO
-
-#define MSA_DO(DF) \
- do { \
- pwx->DF[2*i] = pwt->DF[2*i+1]; \
- pwx->DF[2*i+1] = pws->DF[2*i+1]; \
- } while (0)
-MSA_FN_DF(ilvod_df)
-#undef MSA_DO
#undef MSA_LOOP_COND
#define MSA_LOOP_COND(DF) \
@@ -1231,6 +1230,314 @@ MSA_FN_DF(vshf_df)
#undef MSA_LOOP_COND
#undef MSA_FN_DF
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[15] = pws->b[14];
+ pwd->b[14] = pwt->b[14];
+ pwd->b[13] = pws->b[12];
+ pwd->b[12] = pwt->b[12];
+ pwd->b[11] = pws->b[10];
+ pwd->b[10] = pwt->b[10];
+ pwd->b[9] = pws->b[8];
+ pwd->b[8] = pwt->b[8];
+ pwd->b[7] = pws->b[6];
+ pwd->b[6] = pwt->b[6];
+ pwd->b[5] = pws->b[4];
+ pwd->b[4] = pwt->b[4];
+ pwd->b[3] = pws->b[2];
+ pwd->b[2] = pwt->b[2];
+ pwd->b[1] = pws->b[0];
+ pwd->b[0] = pwt->b[0];
+ break;
+ case DF_HALF:
+ pwd->h[7] = pws->h[6];
+ pwd->h[6] = pwt->h[6];
+ pwd->h[5] = pws->h[4];
+ pwd->h[4] = pwt->h[4];
+ pwd->h[3] = pws->h[2];
+ pwd->h[2] = pwt->h[2];
+ pwd->h[1] = pws->h[0];
+ pwd->h[0] = pwt->h[0];
+ break;
+ case DF_WORD:
+ pwd->w[3] = pws->w[2];
+ pwd->w[2] = pwt->w[2];
+ pwd->w[1] = pws->w[0];
+ pwd->w[0] = pwt->w[0];
+ break;
+ case DF_DOUBLE:
+ pwd->d[1] = pws->d[0];
+ pwd->d[0] = pwt->d[0];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[0] = pwt->b[1];
+ pwd->b[1] = pws->b[1];
+ pwd->b[2] = pwt->b[3];
+ pwd->b[3] = pws->b[3];
+ pwd->b[4] = pwt->b[5];
+ pwd->b[5] = pws->b[5];
+ pwd->b[6] = pwt->b[7];
+ pwd->b[7] = pws->b[7];
+ pwd->b[8] = pwt->b[9];
+ pwd->b[9] = pws->b[9];
+ pwd->b[10] = pwt->b[11];
+ pwd->b[11] = pws->b[11];
+ pwd->b[12] = pwt->b[13];
+ pwd->b[13] = pws->b[13];
+ pwd->b[14] = pwt->b[15];
+ pwd->b[15] = pws->b[15];
+ break;
+ case DF_HALF:
+ pwd->h[0] = pwt->h[1];
+ pwd->h[1] = pws->h[1];
+ pwd->h[2] = pwt->h[3];
+ pwd->h[3] = pws->h[3];
+ pwd->h[4] = pwt->h[5];
+ pwd->h[5] = pws->h[5];
+ pwd->h[6] = pwt->h[7];
+ pwd->h[7] = pws->h[7];
+ break;
+ case DF_WORD:
+ pwd->w[0] = pwt->w[1];
+ pwd->w[1] = pws->w[1];
+ pwd->w[2] = pwt->w[3];
+ pwd->w[3] = pws->w[3];
+ break;
+ case DF_DOUBLE:
+ pwd->d[0] = pwt->d[1];
+ pwd->d[1] = pws->d[1];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[0] = pwt->b[8];
+ pwd->b[1] = pws->b[8];
+ pwd->b[2] = pwt->b[9];
+ pwd->b[3] = pws->b[9];
+ pwd->b[4] = pwt->b[10];
+ pwd->b[5] = pws->b[10];
+ pwd->b[6] = pwt->b[11];
+ pwd->b[7] = pws->b[11];
+ pwd->b[8] = pwt->b[12];
+ pwd->b[9] = pws->b[12];
+ pwd->b[10] = pwt->b[13];
+ pwd->b[11] = pws->b[13];
+ pwd->b[12] = pwt->b[14];
+ pwd->b[13] = pws->b[14];
+ pwd->b[14] = pwt->b[15];
+ pwd->b[15] = pws->b[15];
+ break;
+ case DF_HALF:
+ pwd->h[0] = pwt->h[4];
+ pwd->h[1] = pws->h[4];
+ pwd->h[2] = pwt->h[5];
+ pwd->h[3] = pws->h[5];
+ pwd->h[4] = pwt->h[6];
+ pwd->h[5] = pws->h[6];
+ pwd->h[6] = pwt->h[7];
+ pwd->h[7] = pws->h[7];
+ break;
+ case DF_WORD:
+ pwd->w[0] = pwt->w[2];
+ pwd->w[1] = pws->w[2];
+ pwd->w[2] = pwt->w[3];
+ pwd->w[3] = pws->w[3];
+ break;
+ case DF_DOUBLE:
+ pwd->d[0] = pwt->d[1];
+ pwd->d[1] = pws->d[1];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[15] = pws->b[7];
+ pwd->b[14] = pwt->b[7];
+ pwd->b[13] = pws->b[6];
+ pwd->b[12] = pwt->b[6];
+ pwd->b[11] = pws->b[5];
+ pwd->b[10] = pwt->b[5];
+ pwd->b[9] = pws->b[4];
+ pwd->b[8] = pwt->b[4];
+ pwd->b[7] = pws->b[3];
+ pwd->b[6] = pwt->b[3];
+ pwd->b[5] = pws->b[2];
+ pwd->b[4] = pwt->b[2];
+ pwd->b[3] = pws->b[1];
+ pwd->b[2] = pwt->b[1];
+ pwd->b[1] = pws->b[0];
+ pwd->b[0] = pwt->b[0];
+ break;
+ case DF_HALF:
+ pwd->h[7] = pws->h[3];
+ pwd->h[6] = pwt->h[3];
+ pwd->h[5] = pws->h[2];
+ pwd->h[4] = pwt->h[2];
+ pwd->h[3] = pws->h[1];
+ pwd->h[2] = pwt->h[1];
+ pwd->h[1] = pws->h[0];
+ pwd->h[0] = pwt->h[0];
+ break;
+ case DF_WORD:
+ pwd->w[3] = pws->w[1];
+ pwd->w[2] = pwt->w[1];
+ pwd->w[1] = pws->w[0];
+ pwd->w[0] = pwt->w[0];
+ break;
+ case DF_DOUBLE:
+ pwd->d[1] = pws->d[0];
+ pwd->d[0] = pwt->d[0];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void helper_msa_pckev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[15] = pws->b[14];
+ pwd->b[13] = pws->b[10];
+ pwd->b[11] = pws->b[6];
+ pwd->b[9] = pws->b[2];
+ pwd->b[7] = pwt->b[14];
+ pwd->b[5] = pwt->b[10];
+ pwd->b[3] = pwt->b[6];
+ pwd->b[1] = pwt->b[2];
+ pwd->b[14] = pws->b[12];
+ pwd->b[10] = pws->b[4];
+ pwd->b[6] = pwt->b[12];
+ pwd->b[2] = pwt->b[4];
+ pwd->b[12] = pws->b[8];
+ pwd->b[4] = pwt->b[8];
+ pwd->b[8] = pws->b[0];
+ pwd->b[0] = pwt->b[0];
+ break;
+ case DF_HALF:
+ pwd->h[7] = pws->h[6];
+ pwd->h[5] = pws->h[2];
+ pwd->h[3] = pwt->h[6];
+ pwd->h[1] = pwt->h[2];
+ pwd->h[6] = pws->h[4];
+ pwd->h[2] = pwt->h[4];
+ pwd->h[4] = pws->h[0];
+ pwd->h[0] = pwt->h[0];
+ break;
+ case DF_WORD:
+ pwd->w[3] = pws->w[2];
+ pwd->w[1] = pwt->w[2];
+ pwd->w[2] = pws->w[0];
+ pwd->w[0] = pwt->w[0];
+ break;
+ case DF_DOUBLE:
+ pwd->d[1] = pws->d[0];
+ pwd->d[0] = pwt->d[0];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void helper_msa_pckod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+ uint32_t ws, uint32_t wt)
+{
+ wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+ wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+ wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+ switch (df) {
+ case DF_BYTE:
+ pwd->b[0] = pwt->b[1];
+ pwd->b[2] = pwt->b[5];
+ pwd->b[4] = pwt->b[9];
+ pwd->b[6] = pwt->b[13];
+ pwd->b[8] = pws->b[1];
+ pwd->b[10] = pws->b[5];
+ pwd->b[12] = pws->b[9];
+ pwd->b[14] = pws->b[13];
+ pwd->b[1] = pwt->b[3];
+ pwd->b[5] = pwt->b[11];
+ pwd->b[9] = pws->b[3];
+ pwd->b[13] = pws->b[11];
+ pwd->b[3] = pwt->b[7];
+ pwd->b[11] = pws->b[7];
+ pwd->b[7] = pwt->b[15];
+ pwd->b[15] = pws->b[15];
+ break;
+ case DF_HALF:
+ pwd->h[0] = pwt->h[1];
+ pwd->h[2] = pwt->h[5];
+ pwd->h[4] = pws->h[1];
+ pwd->h[6] = pws->h[5];
+ pwd->h[1] = pwt->h[3];
+ pwd->h[5] = pws->h[3];
+ pwd->h[3] = pwt->h[7];
+ pwd->h[7] = pws->h[7];
+ break;
+ case DF_WORD:
+ pwd->w[0] = pwt->w[1];
+ pwd->w[2] = pws->w[1];
+ pwd->w[1] = pwt->w[3];
+ pwd->w[3] = pws->w[3];
+ break;
+ case DF_DOUBLE:
+ pwd->d[0] = pwt->d[1];
+ pwd->d[1] = pws->d[1];
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
uint32_t ws, uint32_t n)
{
@@ -1537,28 +1844,45 @@ void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, \
{ \
wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \
wr_t *pws = &(env->active_fpu.fpr[ws].wr); \
- uint32_t i; \
\
switch (df) { \
case DF_BYTE: \
- for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \
- pwd->b[i] = msa_ ## func ## _df(df, pws->b[i]); \
- } \
+ pwd->b[0] = msa_ ## func ## _df(df, pws->b[0]); \
+ pwd->b[1] = msa_ ## func ## _df(df, pws->b[1]); \
+ pwd->b[2] = msa_ ## func ## _df(df, pws->b[2]); \
+ pwd->b[3] = msa_ ## func ## _df(df, pws->b[3]); \
+ pwd->b[4] = msa_ ## func ## _df(df, pws->b[4]); \
+ pwd->b[5] = msa_ ## func ## _df(df, pws->b[5]); \
+ pwd->b[6] = msa_ ## func ## _df(df, pws->b[6]); \
+ pwd->b[7] = msa_ ## func ## _df(df, pws->b[7]); \
+ pwd->b[8] = msa_ ## func ## _df(df, pws->b[8]); \
+ pwd->b[9] = msa_ ## func ## _df(df, pws->b[9]); \
+ pwd->b[10] = msa_ ## func ## _df(df, pws->b[10]); \
+ pwd->b[11] = msa_ ## func ## _df(df, pws->b[11]); \
+ pwd->b[12] = msa_ ## func ## _df(df, pws->b[12]); \
+ pwd->b[13] = msa_ ## func ## _df(df, pws->b[13]); \
+ pwd->b[14] = msa_ ## func ## _df(df, pws->b[14]); \
+ pwd->b[15] = msa_ ## func ## _df(df, pws->b[15]); \
break; \
case DF_HALF: \
- for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \
- pwd->h[i] = msa_ ## func ## _df(df, pws->h[i]); \
- } \
+ pwd->h[0] = msa_ ## func ## _df(df, pws->h[0]); \
+ pwd->h[1] = msa_ ## func ## _df(df, pws->h[1]); \
+ pwd->h[2] = msa_ ## func ## _df(df, pws->h[2]); \
+ pwd->h[3] = msa_ ## func ## _df(df, pws->h[3]); \
+ pwd->h[4] = msa_ ## func ## _df(df, pws->h[4]); \
+ pwd->h[5] = msa_ ## func ## _df(df, pws->h[5]); \
+ pwd->h[6] = msa_ ## func ## _df(df, pws->h[6]); \
+ pwd->h[7] = msa_ ## func ## _df(df, pws->h[7]); \
break; \
case DF_WORD: \
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \
- pwd->w[i] = msa_ ## func ## _df(df, pws->w[i]); \
- } \
+ pwd->w[0] = msa_ ## func ## _df(df, pws->w[0]); \
+ pwd->w[1] = msa_ ## func ## _df(df, pws->w[1]); \
+ pwd->w[2] = msa_ ## func ## _df(df, pws->w[2]); \
+ pwd->w[3] = msa_ ## func ## _df(df, pws->w[3]); \
break; \
case DF_DOUBLE: \
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \
- pwd->d[i] = msa_ ## func ## _df(df, pws->d[i]); \
- } \
+ pwd->d[0] = msa_ ## func ## _df(df, pws->d[0]); \
+ pwd->d[1] = msa_ ## func ## _df(df, pws->d[1]); \
break; \
default: \
assert(0); \