aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorSong Gao <gaosong@loongson.cn>2023-05-04 20:27:53 +0800
committerSong Gao <gaosong@loongson.cn>2023-05-06 11:19:47 +0800
commit83b3815dbc96f9af126d6ed4fb718104e6bc3e69 (patch)
tree4f043240ad0ac482794be439d41df9e57fd43a5d /target
parenta5200a17c917817bd531218dc74f13c2007a0029 (diff)
downloadqemu-83b3815dbc96f9af126d6ed4fb718104e6bc3e69.zip
qemu-83b3815dbc96f9af126d6ed4fb718104e6bc3e69.tar.gz
qemu-83b3815dbc96f9af126d6ed4fb718104e6bc3e69.tar.bz2
target/loongarch: Implement vssrln vssran
This patch includes: - VSSRLN.{B.H/H.W/W.D}; - VSSRAN.{B.H/H.W/W.D}; - VSSRLN.{BU.H/HU.W/WU.D}; - VSSRAN.{BU.H/HU.W/WU.D}; - VSSRLNI.{B.H/H.W/W.D/D.Q}; - VSSRANI.{B.H/H.W/W.D/D.Q}; - VSSRLNI.{BU.H/HU.W/WU.D/DU.Q}; - VSSRANI.{BU.H/HU.W/WU.D/DU.Q}. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Song Gao <gaosong@loongson.cn> Message-Id: <20230504122810.4094787-28-gaosong@loongson.cn>
Diffstat (limited to 'target')
-rw-r--r--target/loongarch/disas.c30
-rw-r--r--target/loongarch/helper.h30
-rw-r--r--target/loongarch/insn_trans/trans_lsx.c.inc30
-rw-r--r--target/loongarch/insns.decode30
-rw-r--r--target/loongarch/lsx_helper.c379
5 files changed, 499 insertions, 0 deletions
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index 185cd36..426d30d 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1198,3 +1198,33 @@ INSN_LSX(vsrarni_b_h, vv_i)
INSN_LSX(vsrarni_h_w, vv_i)
INSN_LSX(vsrarni_w_d, vv_i)
INSN_LSX(vsrarni_d_q, vv_i)
+
+INSN_LSX(vssrln_b_h, vvv)
+INSN_LSX(vssrln_h_w, vvv)
+INSN_LSX(vssrln_w_d, vvv)
+INSN_LSX(vssran_b_h, vvv)
+INSN_LSX(vssran_h_w, vvv)
+INSN_LSX(vssran_w_d, vvv)
+INSN_LSX(vssrln_bu_h, vvv)
+INSN_LSX(vssrln_hu_w, vvv)
+INSN_LSX(vssrln_wu_d, vvv)
+INSN_LSX(vssran_bu_h, vvv)
+INSN_LSX(vssran_hu_w, vvv)
+INSN_LSX(vssran_wu_d, vvv)
+
+INSN_LSX(vssrlni_b_h, vv_i)
+INSN_LSX(vssrlni_h_w, vv_i)
+INSN_LSX(vssrlni_w_d, vv_i)
+INSN_LSX(vssrlni_d_q, vv_i)
+INSN_LSX(vssrani_b_h, vv_i)
+INSN_LSX(vssrani_h_w, vv_i)
+INSN_LSX(vssrani_w_d, vv_i)
+INSN_LSX(vssrani_d_q, vv_i)
+INSN_LSX(vssrlni_bu_h, vv_i)
+INSN_LSX(vssrlni_hu_w, vv_i)
+INSN_LSX(vssrlni_wu_d, vv_i)
+INSN_LSX(vssrlni_du_q, vv_i)
+INSN_LSX(vssrani_bu_h, vv_i)
+INSN_LSX(vssrani_hu_w, vv_i)
+INSN_LSX(vssrani_wu_d, vv_i)
+INSN_LSX(vssrani_du_q, vv_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index 0a8cfe3..28f1597 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -411,3 +411,33 @@ DEF_HELPER_4(vsrarni_b_h, void, env, i32, i32, i32)
DEF_HELPER_4(vsrarni_h_w, void, env, i32, i32, i32)
DEF_HELPER_4(vsrarni_w_d, void, env, i32, i32, i32)
DEF_HELPER_4(vsrarni_d_q, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrln_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrln_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssran_wu_d, void, env, i32, i32, i32)
+
+DEF_HELPER_4(vssrlni_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_b_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_h_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_w_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_d_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrlni_du_q, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_bu_h, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_hu_w, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_wu_d, void, env, i32, i32, i32)
+DEF_HELPER_4(vssrani_du_q, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lsx.c.inc b/target/loongarch/insn_trans/trans_lsx.c.inc
index 6034a74..5d7e45a 100644
--- a/target/loongarch/insn_trans/trans_lsx.c.inc
+++ b/target/loongarch/insn_trans/trans_lsx.c.inc
@@ -3037,3 +3037,33 @@ TRANS(vsrarni_b_h, gen_vv_i, gen_helper_vsrarni_b_h)
TRANS(vsrarni_h_w, gen_vv_i, gen_helper_vsrarni_h_w)
TRANS(vsrarni_w_d, gen_vv_i, gen_helper_vsrarni_w_d)
TRANS(vsrarni_d_q, gen_vv_i, gen_helper_vsrarni_d_q)
+
+TRANS(vssrln_b_h, gen_vvv, gen_helper_vssrln_b_h)
+TRANS(vssrln_h_w, gen_vvv, gen_helper_vssrln_h_w)
+TRANS(vssrln_w_d, gen_vvv, gen_helper_vssrln_w_d)
+TRANS(vssran_b_h, gen_vvv, gen_helper_vssran_b_h)
+TRANS(vssran_h_w, gen_vvv, gen_helper_vssran_h_w)
+TRANS(vssran_w_d, gen_vvv, gen_helper_vssran_w_d)
+TRANS(vssrln_bu_h, gen_vvv, gen_helper_vssrln_bu_h)
+TRANS(vssrln_hu_w, gen_vvv, gen_helper_vssrln_hu_w)
+TRANS(vssrln_wu_d, gen_vvv, gen_helper_vssrln_wu_d)
+TRANS(vssran_bu_h, gen_vvv, gen_helper_vssran_bu_h)
+TRANS(vssran_hu_w, gen_vvv, gen_helper_vssran_hu_w)
+TRANS(vssran_wu_d, gen_vvv, gen_helper_vssran_wu_d)
+
+TRANS(vssrlni_b_h, gen_vv_i, gen_helper_vssrlni_b_h)
+TRANS(vssrlni_h_w, gen_vv_i, gen_helper_vssrlni_h_w)
+TRANS(vssrlni_w_d, gen_vv_i, gen_helper_vssrlni_w_d)
+TRANS(vssrlni_d_q, gen_vv_i, gen_helper_vssrlni_d_q)
+TRANS(vssrani_b_h, gen_vv_i, gen_helper_vssrani_b_h)
+TRANS(vssrani_h_w, gen_vv_i, gen_helper_vssrani_h_w)
+TRANS(vssrani_w_d, gen_vv_i, gen_helper_vssrani_w_d)
+TRANS(vssrani_d_q, gen_vv_i, gen_helper_vssrani_d_q)
+TRANS(vssrlni_bu_h, gen_vv_i, gen_helper_vssrlni_bu_h)
+TRANS(vssrlni_hu_w, gen_vv_i, gen_helper_vssrlni_hu_w)
+TRANS(vssrlni_wu_d, gen_vv_i, gen_helper_vssrlni_wu_d)
+TRANS(vssrlni_du_q, gen_vv_i, gen_helper_vssrlni_du_q)
+TRANS(vssrani_bu_h, gen_vv_i, gen_helper_vssrani_bu_h)
+TRANS(vssrani_hu_w, gen_vv_i, gen_helper_vssrani_hu_w)
+TRANS(vssrani_wu_d, gen_vv_i, gen_helper_vssrani_wu_d)
+TRANS(vssrani_du_q, gen_vv_i, gen_helper_vssrani_du_q)
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 29bf4a8..772c5cd 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -899,3 +899,33 @@ vsrarni_b_h 0111 00110101 11000 1 .... ..... ..... @vv_ui4
vsrarni_h_w 0111 00110101 11001 ..... ..... ..... @vv_ui5
vsrarni_w_d 0111 00110101 1101 ...... ..... ..... @vv_ui6
vsrarni_d_q 0111 00110101 111 ....... ..... ..... @vv_ui7
+
+vssrln_b_h 0111 00001111 11001 ..... ..... ..... @vvv
+vssrln_h_w 0111 00001111 11010 ..... ..... ..... @vvv
+vssrln_w_d 0111 00001111 11011 ..... ..... ..... @vvv
+vssran_b_h 0111 00001111 11101 ..... ..... ..... @vvv
+vssran_h_w 0111 00001111 11110 ..... ..... ..... @vvv
+vssran_w_d 0111 00001111 11111 ..... ..... ..... @vvv
+vssrln_bu_h 0111 00010000 01001 ..... ..... ..... @vvv
+vssrln_hu_w 0111 00010000 01010 ..... ..... ..... @vvv
+vssrln_wu_d 0111 00010000 01011 ..... ..... ..... @vvv
+vssran_bu_h 0111 00010000 01101 ..... ..... ..... @vvv
+vssran_hu_w 0111 00010000 01110 ..... ..... ..... @vvv
+vssran_wu_d 0111 00010000 01111 ..... ..... ..... @vvv
+
+vssrlni_b_h 0111 00110100 10000 1 .... ..... ..... @vv_ui4
+vssrlni_h_w 0111 00110100 10001 ..... ..... ..... @vv_ui5
+vssrlni_w_d 0111 00110100 1001 ...... ..... ..... @vv_ui6
+vssrlni_d_q 0111 00110100 101 ....... ..... ..... @vv_ui7
+vssrani_b_h 0111 00110110 00000 1 .... ..... ..... @vv_ui4
+vssrani_h_w 0111 00110110 00001 ..... ..... ..... @vv_ui5
+vssrani_w_d 0111 00110110 0001 ...... ..... ..... @vv_ui6
+vssrani_d_q 0111 00110110 001 ....... ..... ..... @vv_ui7
+vssrlni_bu_h 0111 00110100 11000 1 .... ..... ..... @vv_ui4
+vssrlni_hu_w 0111 00110100 11001 ..... ..... ..... @vv_ui5
+vssrlni_wu_d 0111 00110100 1101 ...... ..... ..... @vv_ui6
+vssrlni_du_q 0111 00110100 111 ....... ..... ..... @vv_ui7
+vssrani_bu_h 0111 00110110 01000 1 .... ..... ..... @vv_ui4
+vssrani_hu_w 0111 00110110 01001 ..... ..... ..... @vv_ui5
+vssrani_wu_d 0111 00110110 0101 ...... ..... ..... @vv_ui6
+vssrani_du_q 0111 00110110 011 ....... ..... ..... @vv_ui7
diff --git a/target/loongarch/lsx_helper.c b/target/loongarch/lsx_helper.c
index d8923eb..b9110dc 100644
--- a/target/loongarch/lsx_helper.c
+++ b/target/loongarch/lsx_helper.c
@@ -1178,3 +1178,382 @@ void HELPER(vsrarni_d_q)(CPULoongArchState *env,
VSRARNI(vsrarni_b_h, 16, B, H)
VSRARNI(vsrarni_h_w, 32, H, W)
VSRARNI(vsrarni_w_d, 64, W, D)
+
+#define SSRLNS(NAME, T1, T2, T3) \
+static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = (((T1)e2) >> sa); \
+ } \
+ T3 mask; \
+ mask = (1ull << sh) -1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLNS(B, uint16_t, int16_t, uint8_t)
+SSRLNS(H, uint32_t, int32_t, uint16_t)
+SSRLNS(W, uint64_t, int64_t, uint32_t)
+
+#define VSSRLN(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t vk) \
+{ \
+ int i; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = do_ssrlns_ ## E1(Vj->E2(i), (T)Vk->E2(i)% BIT, BIT/2 -1); \
+ } \
+ Vd->D(1) = 0; \
+}
+
+VSSRLN(vssrln_b_h, 16, uint16_t, B, H)
+VSSRLN(vssrln_h_w, 32, uint32_t, H, W)
+VSSRLN(vssrln_w_d, 64, uint64_t, W, D)
+
+#define SSRANS(E, T1, T2) \
+static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = e2 >> sa; \
+ } \
+ T2 mask; \
+ mask = (1ll << sh) -1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else if (shft_res < -(mask +1)) { \
+ return ~mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRANS(B, int16_t, int8_t)
+SSRANS(H, int32_t, int16_t)
+SSRANS(W, int64_t, int32_t)
+
+#define VSSRAN(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t vk) \
+{ \
+ int i; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = do_ssrans_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2 -1); \
+ } \
+ Vd->D(1) = 0; \
+}
+
+VSSRAN(vssran_b_h, 16, uint16_t, B, H)
+VSSRAN(vssran_h_w, 32, uint32_t, H, W)
+VSSRAN(vssran_w_d, 64, uint64_t, W, D)
+
+#define SSRLNU(E, T1, T2, T3) \
+static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = (((T1)e2) >> sa); \
+ } \
+ T2 mask; \
+ mask = (1ull << sh) -1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLNU(B, uint16_t, uint8_t, int16_t)
+SSRLNU(H, uint32_t, uint16_t, int32_t)
+SSRLNU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRLNU(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t vk) \
+{ \
+ int i; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
+ } \
+ Vd->D(1) = 0; \
+}
+
+VSSRLNU(vssrln_bu_h, 16, uint16_t, B, H)
+VSSRLNU(vssrln_hu_w, 32, uint32_t, H, W)
+VSSRLNU(vssrln_wu_d, 64, uint64_t, W, D)
+
+#define SSRANU(E, T1, T2, T3) \
+static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = e2 >> sa; \
+ } \
+ if (e2 < 0) { \
+ shft_res = 0; \
+ } \
+ T2 mask; \
+ mask = (1ull << sh) -1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRANU(B, uint16_t, uint8_t, int16_t)
+SSRANU(H, uint32_t, uint16_t, int32_t)
+SSRANU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRANU(NAME, BIT, T, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t vk) \
+{ \
+ int i; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ Vd->E1(i) = do_ssranu_ ## E1(Vj->E2(i), (T)Vk->E2(i)%BIT, BIT/2); \
+ } \
+ Vd->D(1) = 0; \
+}
+
+VSSRANU(vssran_bu_h, 16, uint16_t, B, H)
+VSSRANU(vssran_hu_w, 32, uint32_t, H, W)
+VSSRANU(vssran_wu_d, 64, uint64_t, W, D)
+
+#define VSSRLNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E1(i) = do_ssrlns_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlns_ ## E1(Vd->E2(i), imm, BIT/2 -1);\
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrlni_d_q)(CPULoongArchState *env,
+ uint32_t vd, uint32_t vj, uint32_t imm)
+{
+ Int128 shft_res1, shft_res2, mask;
+ VReg *Vd = &(env->fpr[vd].vreg);
+ VReg *Vj = &(env->fpr[vj].vreg);
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(0);
+ shft_res2 = Vd->Q(0);
+ } else {
+ shft_res1 = int128_urshift(Vj->Q(0), imm);
+ shft_res2 = int128_urshift(Vd->Q(0), imm);
+ }
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(0) = int128_getlo(mask);
+ }else {
+ Vd->D(0) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(1) = int128_getlo(mask);
+ }else {
+ Vd->D(1) = int128_getlo(shft_res2);
+ }
+}
+
+VSSRLNI(vssrlni_b_h, 16, B, H)
+VSSRLNI(vssrlni_h_w, 32, H, W)
+VSSRLNI(vssrlni_w_d, 64, W, D)
+
+#define VSSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E1(i) = do_ssrans_ ## E1(Vj->E2(i), imm, BIT/2 -1); \
+ temp.E1(i + LSX_LEN/BIT) = do_ssrans_ ## E1(Vd->E2(i), imm, BIT/2 -1); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrani_d_q)(CPULoongArchState *env,
+ uint32_t vd, uint32_t vj, uint32_t imm)
+{
+ Int128 shft_res1, shft_res2, mask, min;
+ VReg *Vd = &(env->fpr[vd].vreg);
+ VReg *Vj = &(env->fpr[vj].vreg);
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(0);
+ shft_res2 = Vd->Q(0);
+ } else {
+ shft_res1 = int128_rshift(Vj->Q(0), imm);
+ shft_res2 = int128_rshift(Vd->Q(0), imm);
+ }
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+ min = int128_lshift(int128_one(), 63);
+
+ if (int128_gt(shft_res1, mask)) {
+ Vd->D(0) = int128_getlo(mask);
+ } else if (int128_lt(shft_res1, int128_neg(min))) {
+ Vd->D(0) = int128_getlo(min);
+ } else {
+ Vd->D(0) = int128_getlo(shft_res1);
+ }
+
+ if (int128_gt(shft_res2, mask)) {
+ Vd->D(1) = int128_getlo(mask);
+ } else if (int128_lt(shft_res2, int128_neg(min))) {
+ Vd->D(1) = int128_getlo(min);
+ } else {
+ Vd->D(1) = int128_getlo(shft_res2);
+ }
+}
+
+VSSRANI(vssrani_b_h, 16, B, H)
+VSSRANI(vssrani_h_w, 32, H, W)
+VSSRANI(vssrani_w_d, 64, W, D)
+
+#define VSSRLNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E1(i) = do_ssrlnu_ ## E1(Vj->E2(i), imm, BIT/2); \
+ temp.E1(i + LSX_LEN/BIT) = do_ssrlnu_ ## E1(Vd->E2(i), imm, BIT/2); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrlni_du_q)(CPULoongArchState *env,
+ uint32_t vd, uint32_t vj, uint32_t imm)
+{
+ Int128 shft_res1, shft_res2, mask;
+ VReg *Vd = &(env->fpr[vd].vreg);
+ VReg *Vj = &(env->fpr[vj].vreg);
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(0);
+ shft_res2 = Vd->Q(0);
+ } else {
+ shft_res1 = int128_urshift(Vj->Q(0), imm);
+ shft_res2 = int128_urshift(Vd->Q(0), imm);
+ }
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(0) = int128_getlo(mask);
+ }else {
+ Vd->D(0) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(1) = int128_getlo(mask);
+ }else {
+ Vd->D(1) = int128_getlo(shft_res2);
+ }
+}
+
+VSSRLNUI(vssrlni_bu_h, 16, B, H)
+VSSRLNUI(vssrlni_hu_w, 32, H, W)
+VSSRLNUI(vssrlni_wu_d, 64, W, D)
+
+#define VSSRANUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t vd, uint32_t vj, uint32_t imm) \
+{ \
+ int i; \
+ VReg temp; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ for (i = 0; i < LSX_LEN/BIT; i++) { \
+ temp.E1(i) = do_ssranu_ ## E1(Vj->E2(i), imm, BIT/2); \
+ temp.E1(i + LSX_LEN/BIT) = do_ssranu_ ## E1(Vd->E2(i), imm, BIT/2); \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrani_du_q)(CPULoongArchState *env,
+ uint32_t vd, uint32_t vj, uint32_t imm)
+{
+ Int128 shft_res1, shft_res2, mask;
+ VReg *Vd = &(env->fpr[vd].vreg);
+ VReg *Vj = &(env->fpr[vj].vreg);
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(0);
+ shft_res2 = Vd->Q(0);
+ } else {
+ shft_res1 = int128_rshift(Vj->Q(0), imm);
+ shft_res2 = int128_rshift(Vd->Q(0), imm);
+ }
+
+ if (int128_lt(Vj->Q(0), int128_zero())) {
+ shft_res1 = int128_zero();
+ }
+
+ if (int128_lt(Vd->Q(0), int128_zero())) {
+ shft_res2 = int128_zero();
+ }
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(0) = int128_getlo(mask);
+ }else {
+ Vd->D(0) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(1) = int128_getlo(mask);
+ }else {
+ Vd->D(1) = int128_getlo(shft_res2);
+ }
+}
+
+VSSRANUI(vssrani_bu_h, 16, B, H)
+VSSRANUI(vssrani_hu_w, 32, H, W)
+VSSRANUI(vssrani_wu_d, 64, W, D)