aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSong Gao <gaosong@loongson.cn>2023-05-04 20:28:07 +0800
committerSong Gao <gaosong@loongson.cn>2023-05-06 11:19:49 +0800
commit843b627a395a8f6cda8a717c5c8b081358b9d711 (patch)
tree2b8dd139640cc8873dc8d9aedb170dee4b5357dc
parente93dd43147351290ec1eac83e04cfc0a7f641d63 (diff)
downloadqemu-843b627a395a8f6cda8a717c5c8b081358b9d711.zip
qemu-843b627a395a8f6cda8a717c5c8b081358b9d711.tar.gz
qemu-843b627a395a8f6cda8a717c5c8b081358b9d711.tar.bz2
target/loongarch: Implement vld vst
This patch includes: - VLD[X], VST[X]; - VLDREPL.{B/H/W/D}; - VSTELM.{B/H/W/D}. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Song Gao <gaosong@loongson.cn> Message-Id: <20230504122810.4094787-42-gaosong@loongson.cn>
-rw-r--r--target/loongarch/disas.c34
-rw-r--r--target/loongarch/insn_trans/trans_lsx.c.inc159
-rw-r--r--target/loongarch/insns.decode36
-rw-r--r--target/loongarch/translate.c10
4 files changed, 239 insertions, 0 deletions
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 0b62bbb..8627908 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -21,11 +21,21 @@ static inline int plus_1(DisasContext *ctx, int x)
return x + 1;
}
+static inline int shl_1(DisasContext *ctx, int x)
+{
+ return x << 1;
+}
+
static inline int shl_2(DisasContext *ctx, int x)
{
return x << 2;
}
+static inline int shl_3(DisasContext *ctx, int x)
+{
+ return x << 3;
+}
+
#define CSR_NAME(REG) \
[LOONGARCH_CSR_##REG] = (#REG)
@@ -823,6 +833,11 @@ static void output_vr_i(DisasContext *ctx, arg_vr_i *a, const char *mnemonic)
output(ctx, mnemonic, "v%d, r%d, 0x%x", a->vd, a->rj, a->imm);
}
+static void output_vr_ii(DisasContext *ctx, arg_vr_ii *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "v%d, r%d, 0x%x, 0x%x", a->vd, a->rj, a->imm, a->imm2);
+}
+
static void output_rv_i(DisasContext *ctx, arg_rv_i *a, const char *mnemonic)
{
output(ctx, mnemonic, "r%d, v%d, 0x%x", a->rd, a->vj, a->imm);
@@ -838,6 +853,11 @@ static void output_vvr(DisasContext *ctx, arg_vvr *a, const char *mnemonic)
output(ctx, mnemonic, "v%d, v%d, r%d", a->vd, a->vj, a->rk);
}
+static void output_vrr(DisasContext *ctx, arg_vrr *a, const char *mnemonic)
+{
+ output(ctx, mnemonic, "v%d, r%d, r%d", a->vd, a->rj, a->rk);
+}
+
INSN_LSX(vadd_b, vvv)
INSN_LSX(vadd_h, vvv)
INSN_LSX(vadd_w, vvv)
@@ -1654,3 +1674,17 @@ INSN_LSX(vextrins_d, vv_i)
INSN_LSX(vextrins_w, vv_i)
INSN_LSX(vextrins_h, vv_i)
INSN_LSX(vextrins_b, vv_i)
+
+INSN_LSX(vld, vr_i)
+INSN_LSX(vst, vr_i)
+INSN_LSX(vldx, vrr)
+INSN_LSX(vstx, vrr)
+
+INSN_LSX(vldrepl_d, vr_i)
+INSN_LSX(vldrepl_w, vr_i)
+INSN_LSX(vldrepl_h, vr_i)
+INSN_LSX(vldrepl_b, vr_i)
+INSN_LSX(vstelm_d, vr_ii)
+INSN_LSX(vstelm_w, vr_ii)
+INSN_LSX(vstelm_h, vr_ii)
+INSN_LSX(vstelm_b, vr_ii)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
index e1eee6b..86dfd2b 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -4102,3 +4102,162 @@ TRANS(vextrins_b, gen_vv_i, gen_helper_vextrins_b)
TRANS(vextrins_h, gen_vv_i, gen_helper_vextrins_h)
TRANS(vextrins_w, gen_vv_i, gen_helper_vextrins_w)
TRANS(vextrins_d, gen_vv_i, gen_helper_vextrins_d)
+
+static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
+{
+ TCGv addr, temp;
+ TCGv_i64 rl, rh;
+ TCGv_i128 val;
+
+ CHECK_SXE;
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i128();
+ rl = tcg_temp_new_i64();
+ rh = tcg_temp_new_i64();
+
+ if (a->imm) {
+ temp = tcg_temp_new();
+ tcg_gen_addi_tl(temp, addr, a->imm);
+ addr = temp;
+ }
+
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+ tcg_gen_extr_i128_i64(rl, rh, val);
+ set_vreg64(rh, a->vd, 1);
+ set_vreg64(rl, a->vd, 0);
+
+ return true;
+}
+
+static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
+{
+ TCGv addr, temp;
+ TCGv_i128 val;
+ TCGv_i64 ah, al;
+
+ CHECK_SXE;
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i128();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+
+ if (a->imm) {
+ temp = tcg_temp_new();
+ tcg_gen_addi_tl(temp, addr, a->imm);
+ addr = temp;
+ }
+
+ get_vreg64(ah, a->vd, 1);
+ get_vreg64(al, a->vd, 0);
+ tcg_gen_concat_i64_i128(val, al, ah);
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+
+ return true;
+}
+
+static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
+{
+ TCGv addr, src1, src2;
+ TCGv_i64 rl, rh;
+ TCGv_i128 val;
+
+ CHECK_SXE;
+
+ addr = tcg_temp_new();
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ val = tcg_temp_new_i128();
+ rl = tcg_temp_new_i64();
+ rh = tcg_temp_new_i64();
+
+ tcg_gen_add_tl(addr, src1, src2);
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+ tcg_gen_extr_i128_i64(rl, rh, val);
+ set_vreg64(rh, a->vd, 1);
+ set_vreg64(rl, a->vd, 0);
+
+ return true;
+}
+
+static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
+{
+ TCGv addr, src1, src2;
+ TCGv_i64 ah, al;
+ TCGv_i128 val;
+
+ CHECK_SXE;
+
+ addr = tcg_temp_new();
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ val = tcg_temp_new_i128();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+
+ tcg_gen_add_tl(addr, src1, src2);
+ get_vreg64(ah, a->vd, 1);
+ get_vreg64(al, a->vd, 0);
+ tcg_gen_concat_i64_i128(val, al, ah);
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+
+ return true;
+}
+
+#define VLDREPL(NAME, MO) \
+static bool trans_## NAME (DisasContext *ctx, arg_vr_i *a) \
+{ \
+ TCGv addr, temp; \
+ TCGv_i64 val; \
+ \
+ CHECK_SXE; \
+ \
+ addr = gpr_src(ctx, a->rj, EXT_NONE); \
+ val = tcg_temp_new_i64(); \
+ \
+ if (a->imm) { \
+ temp = tcg_temp_new(); \
+ tcg_gen_addi_tl(temp, addr, a->imm); \
+ addr = temp; \
+ } \
+ \
+ tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, MO); \
+ tcg_gen_gvec_dup_i64(MO, vec_full_offset(a->vd), 16, ctx->vl/8, val); \
+ \
+ return true; \
+}
+
+VLDREPL(vldrepl_b, MO_8)
+VLDREPL(vldrepl_h, MO_16)
+VLDREPL(vldrepl_w, MO_32)
+VLDREPL(vldrepl_d, MO_64)
+
+#define VSTELM(NAME, MO, E) \
+static bool trans_## NAME (DisasContext *ctx, arg_vr_ii *a) \
+{ \
+ TCGv addr, temp; \
+ TCGv_i64 val; \
+ \
+ CHECK_SXE; \
+ \
+ addr = gpr_src(ctx, a->rj, EXT_NONE); \
+ val = tcg_temp_new_i64(); \
+ \
+ if (a->imm) { \
+ temp = tcg_temp_new(); \
+ tcg_gen_addi_tl(temp, addr, a->imm); \
+ addr = temp; \
+ } \
+ \
+ tcg_gen_ld_i64(val, cpu_env, \
+ offsetof(CPULoongArchState, fpr[a->vd].vreg.E(a->imm2))); \
+ tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, MO); \
+ \
+ return true; \
+}
+
+VSTELM(vstelm_b, MO_8, B)
+VSTELM(vstelm_h, MO_16, H)
+VSTELM(vstelm_w, MO_32, W)
+VSTELM(vstelm_d, MO_64, D)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 0263bce..ea6eedb 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -487,6 +487,17 @@ idle 0000 01100100 10001 ............... @i15
dbcl 0000 00000010 10101 ............... @i15
#
+# LSX Fields
+#
+
+%i9s3 10:s9 !function=shl_3
+%i10s2 10:s10 !function=shl_2
+%i11s1 10:s11 !function=shl_1
+%i8s3 10:s8 !function=shl_3
+%i8s2 10:s8 !function=shl_2
+%i8s1 10:s8 !function=shl_1
+
+#
# LSX Argument sets
#
@@ -500,6 +511,8 @@ dbcl 0000 00000010 10101 ............... @i15
&rv_i rd vj imm
&vr vd rj
&vvr vd vj rk
+&vrr vd rj rk
+&vr_ii vd rj imm imm2
#
# LSX Formats
@@ -528,6 +541,15 @@ dbcl 0000 00000010 10101 ............... @i15
@rv_ui1 .... ........ ..... .... imm:1 vj:5 rd:5 &rv_i
@vr .... ........ ..... ..... rj:5 vd:5 &vr
@vvr .... ........ ..... rk:5 vj:5 vd:5 &vvr
+@vr_i9 .... ........ . ......... rj:5 vd:5 &vr_i imm=%i9s3
+@vr_i10 .... ........ .......... rj:5 vd:5 &vr_i imm=%i10s2
+@vr_i11 .... ....... ........... rj:5 vd:5 &vr_i imm=%i11s1
+@vr_i12 .... ...... imm:s12 rj:5 vd:5 &vr_i
+@vr_i8i1 .... ........ . imm2:1 ........ rj:5 vd:5 &vr_ii imm=%i8s3
+@vr_i8i2 .... ........ imm2:2 ........ rj:5 vd:5 &vr_ii imm=%i8s2
+@vr_i8i3 .... ....... imm2:3 ........ rj:5 vd:5 &vr_ii imm=%i8s1
+@vr_i8i4 .... ...... imm2:4 imm:s8 rj:5 vd:5 &vr_ii
+@vrr .... ........ ..... rk:5 rj:5 vd:5 &vrr
vadd_b 0111 00000000 10100 ..... ..... ..... @vvv
vadd_h 0111 00000000 10101 ..... ..... ..... @vvv
@@ -1256,3 +1278,17 @@ vextrins_d 0111 00111000 00 ........ ..... ..... @vv_ui8
vextrins_w 0111 00111000 01 ........ ..... ..... @vv_ui8
vextrins_h 0111 00111000 10 ........ ..... ..... @vv_ui8
vextrins_b 0111 00111000 11 ........ ..... ..... @vv_ui8
+
+vld 0010 110000 ............ ..... ..... @vr_i12
+vst 0010 110001 ............ ..... ..... @vr_i12
+vldx 0011 10000100 00000 ..... ..... ..... @vrr
+vstx 0011 10000100 01000 ..... ..... ..... @vrr
+
+vldrepl_d 0011 00000001 0 ......... ..... ..... @vr_i9
+vldrepl_w 0011 00000010 .......... ..... ..... @vr_i10
+vldrepl_h 0011 0000010 ........... ..... ..... @vr_i11
+vldrepl_b 0011 000010 ............ ..... ..... @vr_i12
+vstelm_d 0011 00010001 0 . ........ ..... ..... @vr_i8i1
+vstelm_w 0011 00010010 .. ........ ..... ..... @vr_i8i2
+vstelm_h 0011 0001010 ... ........ ..... ..... @vr_i8i3
+vstelm_b 0011 000110 .... ........ ..... ..... @vr_i8i4
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index 862847a..c04ed75 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -53,11 +53,21 @@ static inline int plus_1(DisasContext *ctx, int x)
return x + 1;
}
+static inline int shl_1(DisasContext *ctx, int x)
+{
+ return x << 1;
+}
+
static inline int shl_2(DisasContext *ctx, int x)
{
return x << 2;
}
+static inline int shl_3(DisasContext *ctx, int x)
+{
+ return x << 3;
+}
+
/*
* LoongArch the upper 32 bits are undefined ("can be any value").
* QEMU chooses to nanbox, because it is most likely to show guest bugs early.