aboutsummaryrefslogtreecommitdiff
path: root/tcg/loongarch64/tcg-target.c.inc
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/loongarch64/tcg-target.c.inc')
-rw-r--r--tcg/loongarch64/tcg-target.c.inc587
1 files changed, 338 insertions, 249 deletions
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 06ca1ab..5b7ed5c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -303,15 +303,31 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
switch (type) {
case TCG_TYPE_I32:
case TCG_TYPE_I64:
- /*
- * Conventional register-register move used in LoongArch is
- * `or dst, src, zero`.
- */
- tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+ if (ret < TCG_REG_V0) {
+ if (arg < TCG_REG_V0) {
+ /*
+ * Conventional register-register move used in LoongArch is
+ * `or dst, src, zero`.
+ */
+ tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+ } else {
+ tcg_out_opc_movfr2gr_d(s, ret, arg);
+ }
+ } else {
+ if (arg < TCG_REG_V0) {
+ tcg_out_opc_movgr2fr_d(s, ret, arg);
+ } else {
+ tcg_out_opc_fmov_d(s, ret, arg);
+ }
+ }
break;
+ case TCG_TYPE_V64:
case TCG_TYPE_V128:
tcg_out_opc_vori_b(s, ret, arg, 0);
break;
+ case TCG_TYPE_V256:
+ tcg_out_opc_xvori_b(s, ret, arg, 0);
+ break;
default:
g_assert_not_reached();
}
@@ -366,8 +382,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
* back to the slow path.
*/
- intptr_t pc_offset;
- tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
+ intptr_t src_rx, pc_offset;
tcg_target_long hi12, hi32, hi52;
/* Value fits in signed i32. */
@@ -377,24 +392,23 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
}
/* PC-relative cases. */
- pc_offset = tcg_pcrel_diff(s, (void *)val);
- if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
- /* Single pcaddu2i. */
- tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
- return;
+ src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
+ if ((val & 3) == 0) {
+ pc_offset = val - src_rx;
+ if (pc_offset == sextreg(pc_offset, 0, 22)) {
+ /* Single pcaddu2i. */
+ tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
+ return;
+ }
}
- if (pc_offset == (int32_t)pc_offset) {
- /* Offset within 32 bits; load with pcalau12i + ori. */
- val_lo = sextreg(val, 0, 12);
- val_hi = val >> 12;
- pc_hi = (val - pc_offset) >> 12;
- offset_hi = val_hi - pc_hi;
-
- tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
- tcg_out_opc_pcalau12i(s, rd, offset_hi);
+ pc_offset = (val >> 12) - (src_rx >> 12);
+ if (pc_offset == sextreg(pc_offset, 0, 20)) {
+ /* Load with pcalau12i + ori. */
+ tcg_target_long val_lo = val & 0xfff;
+ tcg_out_opc_pcalau12i(s, rd, pc_offset);
if (val_lo != 0) {
- tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff);
+ tcg_out_opc_ori(s, rd, rd, val_lo);
}
return;
}
@@ -803,6 +817,12 @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
case OPC_ST_D:
tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
break;
+ case OPC_FLD_S:
+ case OPC_FLD_D:
+ case OPC_FST_S:
+ case OPC_FST_D:
+ tcg_out32(s, encode_fdjsk12_insn(opc, data, addr, imm12));
+ break;
default:
g_assert_not_reached();
}
@@ -816,14 +836,15 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
if (dest < TCG_REG_V0) {
tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
} else {
- tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_32, dest, base, offset);
+ tcg_out_ldst(s, OPC_FLD_S, dest, base, offset);
}
break;
case TCG_TYPE_I64:
+ case TCG_TYPE_V64:
if (dest < TCG_REG_V0) {
tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
} else {
- tcg_out_dupm_vec(s, TCG_TYPE_I128, MO_64, dest, base, offset);
+ tcg_out_ldst(s, OPC_FLD_D, dest, base, offset);
}
break;
case TCG_TYPE_V128:
@@ -834,6 +855,14 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
}
break;
+ case TCG_TYPE_V256:
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_xvld(s, dest, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_xvldx(s, dest, base, TCG_REG_TMP0);
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -847,36 +876,15 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
if (src < TCG_REG_V0) {
tcg_out_ldst(s, OPC_ST_W, src, base, offset);
} else {
- /* TODO: Could use fst_s, fstx_s */
- if (offset < -0x100 || offset > 0xff || (offset & 3)) {
- if (-0x800 <= offset && offset <= 0x7ff) {
- tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
- tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
- }
- base = TCG_REG_TMP0;
- offset = 0;
- }
- tcg_out_opc_vstelm_w(s, src, base, offset, 0);
+ tcg_out_ldst(s, OPC_FST_S, src, base, offset);
}
break;
case TCG_TYPE_I64:
+ case TCG_TYPE_V64:
if (src < TCG_REG_V0) {
tcg_out_ldst(s, OPC_ST_D, src, base, offset);
} else {
- /* TODO: Could use fst_d, fstx_d */
- if (offset < -0x100 || offset > 0xff || (offset & 7)) {
- if (-0x800 <= offset && offset <= 0x7ff) {
- tcg_out_opc_addi_d(s, TCG_REG_TMP0, base, offset);
- } else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
- tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP0, base);
- }
- base = TCG_REG_TMP0;
- offset = 0;
- }
- tcg_out_opc_vstelm_d(s, src, base, offset, 0);
+ tcg_out_ldst(s, OPC_FST_D, src, base, offset);
}
break;
case TCG_TYPE_V128:
@@ -887,6 +895,14 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
}
break;
+ case TCG_TYPE_V256:
+ if (-0x800 <= offset && offset <= 0x7ff) {
+ tcg_out_opc_xvst(s, src, base, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_xvstx(s, src, base, TCG_REG_TMP0);
+ }
+ break;
default:
g_assert_not_reached();
}
@@ -1675,30 +1691,26 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, TCGReg rs)
{
- switch (vece) {
- case MO_8:
- tcg_out_opc_vreplgr2vr_b(s, rd, rs);
- break;
- case MO_16:
- tcg_out_opc_vreplgr2vr_h(s, rd, rs);
- break;
- case MO_32:
- tcg_out_opc_vreplgr2vr_w(s, rd, rs);
- break;
- case MO_64:
- tcg_out_opc_vreplgr2vr_d(s, rd, rs);
- break;
- default:
- g_assert_not_reached();
- }
+ static const LoongArchInsn repl_insn[2][4] = {
+ { OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H,
+ OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D },
+ { OPC_XVREPLGR2VR_B, OPC_XVREPLGR2VR_H,
+ OPC_XVREPLGR2VR_W, OPC_XVREPLGR2VR_D },
+ };
+ bool lasx = type == TCG_TYPE_V256;
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_out32(s, encode_vdj_insn(repl_insn[lasx][vece], rd, rs));
return true;
}
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg r, TCGReg base, intptr_t offset)
{
- /* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
- if (offset < -0x800 || offset > 0x7ff || \
+ bool lasx = type == TCG_TYPE_V256;
+
+ /* Handle imm overflow and division (vldrepl.d imm is divided by 8). */
+ if (offset < -0x800 || offset > 0x7ff ||
(offset & ((1 << vece) - 1)) != 0) {
tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
base = TCG_REG_TMP0;
@@ -1708,16 +1720,32 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
switch (vece) {
case MO_8:
- tcg_out_opc_vldrepl_b(s, r, base, offset);
+ if (lasx) {
+ tcg_out_opc_xvldrepl_b(s, r, base, offset);
+ } else {
+ tcg_out_opc_vldrepl_b(s, r, base, offset);
+ }
break;
case MO_16:
- tcg_out_opc_vldrepl_h(s, r, base, offset);
+ if (lasx) {
+ tcg_out_opc_xvldrepl_h(s, r, base, offset);
+ } else {
+ tcg_out_opc_vldrepl_h(s, r, base, offset);
+ }
break;
case MO_32:
- tcg_out_opc_vldrepl_w(s, r, base, offset);
+ if (lasx) {
+ tcg_out_opc_xvldrepl_w(s, r, base, offset);
+ } else {
+ tcg_out_opc_vldrepl_w(s, r, base, offset);
+ }
break;
case MO_64:
- tcg_out_opc_vldrepl_d(s, r, base, offset);
+ if (lasx) {
+ tcg_out_opc_xvldrepl_d(s, r, base, offset);
+ } else {
+ tcg_out_opc_vldrepl_d(s, r, base, offset);
+ }
break;
default:
g_assert_not_reached();
@@ -1732,75 +1760,69 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
int64_t value = sextract64(v64, 0, 8 << vece);
if (-0x200 <= value && value <= 0x1FF) {
uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
- tcg_out_opc_vldi(s, rd, imm);
+
+ if (type == TCG_TYPE_V256) {
+ tcg_out_opc_xvldi(s, rd, imm);
+ } else {
+ tcg_out_opc_vldi(s, rd, imm);
+ }
return;
}
/* TODO: vldi patterns when imm 12 is set */
- /* Fallback to vreplgr2vr */
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
- switch (vece) {
- case MO_8:
- tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
- break;
- case MO_16:
- tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
- break;
- case MO_32:
- tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
- break;
- case MO_64:
- tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
- break;
- default:
- g_assert_not_reached();
- }
+ tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
}
-static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
- const TCGArg a1, const TCGArg a2,
+static void tcg_out_addsub_vec(TCGContext *s, bool lasx, unsigned vece,
+ TCGArg a0, TCGArg a1, TCGArg a2,
bool a2_is_const, bool is_add)
{
- static const LoongArchInsn add_vec_insn[4] = {
- OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+ static const LoongArchInsn add_vec_insn[2][4] = {
+ { OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D },
+ { OPC_XVADD_B, OPC_XVADD_H, OPC_XVADD_W, OPC_XVADD_D },
};
- static const LoongArchInsn add_vec_imm_insn[4] = {
- OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+ static const LoongArchInsn add_vec_imm_insn[2][4] = {
+ { OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU },
+ { OPC_XVADDI_BU, OPC_XVADDI_HU, OPC_XVADDI_WU, OPC_XVADDI_DU },
};
- static const LoongArchInsn sub_vec_insn[4] = {
- OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+ static const LoongArchInsn sub_vec_insn[2][4] = {
+ { OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D },
+ { OPC_XVSUB_B, OPC_XVSUB_H, OPC_XVSUB_W, OPC_XVSUB_D },
};
- static const LoongArchInsn sub_vec_imm_insn[4] = {
- OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+ static const LoongArchInsn sub_vec_imm_insn[2][4] = {
+ { OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU },
+ { OPC_XVSUBI_BU, OPC_XVSUBI_HU, OPC_XVSUBI_WU, OPC_XVSUBI_DU },
};
+ LoongArchInsn insn;
if (a2_is_const) {
int64_t value = sextract64(a2, 0, 8 << vece);
+
if (!is_add) {
value = -value;
}
-
- /* Try vaddi/vsubi */
- if (0 <= value && value <= 0x1f) {
- tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
- a1, value));
- return;
- } else if (-0x1f <= value && value < 0) {
- tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
- a1, -value));
- return;
+ if (value < 0) {
+ insn = sub_vec_imm_insn[lasx][vece];
+ value = -value;
+ } else {
+ insn = add_vec_imm_insn[lasx][vece];
}
- /* constraint TCG_CT_CONST_VADD ensures unreachable */
- g_assert_not_reached();
+ /* Constraint TCG_CT_CONST_VADD ensures validity. */
+ tcg_debug_assert(0 <= value && value <= 0x1f);
+
+ tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
+ return;
}
if (is_add) {
- tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+ insn = add_vec_insn[lasx][vece];
} else {
- tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+ insn = sub_vec_insn[lasx][vece];
}
+ tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
}
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
@@ -1809,74 +1831,125 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
const int const_args[TCG_MAX_OP_ARGS])
{
TCGType type = vecl + TCG_TYPE_V64;
+ bool lasx = type == TCG_TYPE_V256;
TCGArg a0, a1, a2, a3;
- TCGReg temp_vec = TCG_VEC_TMP0;
-
- static const LoongArchInsn cmp_vec_insn[16][4] = {
- [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
- [TCG_COND_LE] = {OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D},
- [TCG_COND_LEU] = {OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU},
- [TCG_COND_LT] = {OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D},
- [TCG_COND_LTU] = {OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU},
+ LoongArchInsn insn;
+
+ static const LoongArchInsn cmp_vec_insn[16][2][4] = {
+ [TCG_COND_EQ] = {
+ { OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D },
+ { OPC_XVSEQ_B, OPC_XVSEQ_H, OPC_XVSEQ_W, OPC_XVSEQ_D },
+ },
+ [TCG_COND_LE] = {
+ { OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D },
+ { OPC_XVSLE_B, OPC_XVSLE_H, OPC_XVSLE_W, OPC_XVSLE_D },
+ },
+ [TCG_COND_LEU] = {
+ { OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU },
+ { OPC_XVSLE_BU, OPC_XVSLE_HU, OPC_XVSLE_WU, OPC_XVSLE_DU },
+ },
+ [TCG_COND_LT] = {
+ { OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D },
+ { OPC_XVSLT_B, OPC_XVSLT_H, OPC_XVSLT_W, OPC_XVSLT_D },
+ },
+ [TCG_COND_LTU] = {
+ { OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU },
+ { OPC_XVSLT_BU, OPC_XVSLT_HU, OPC_XVSLT_WU, OPC_XVSLT_DU },
+ }
};
- static const LoongArchInsn cmp_vec_imm_insn[16][4] = {
- [TCG_COND_EQ] = {OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D},
- [TCG_COND_LE] = {OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D},
- [TCG_COND_LEU] = {OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU},
- [TCG_COND_LT] = {OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D},
- [TCG_COND_LTU] = {OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU},
+ static const LoongArchInsn cmp_vec_imm_insn[16][2][4] = {
+ [TCG_COND_EQ] = {
+ { OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D },
+ { OPC_XVSEQI_B, OPC_XVSEQI_H, OPC_XVSEQI_W, OPC_XVSEQI_D },
+ },
+ [TCG_COND_LE] = {
+ { OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D },
+ { OPC_XVSLEI_B, OPC_XVSLEI_H, OPC_XVSLEI_W, OPC_XVSLEI_D },
+ },
+ [TCG_COND_LEU] = {
+ { OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU },
+ { OPC_XVSLEI_BU, OPC_XVSLEI_HU, OPC_XVSLEI_WU, OPC_XVSLEI_DU },
+ },
+ [TCG_COND_LT] = {
+ { OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D },
+ { OPC_XVSLTI_B, OPC_XVSLTI_H, OPC_XVSLTI_W, OPC_XVSLTI_D },
+ },
+ [TCG_COND_LTU] = {
+ { OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU },
+ { OPC_XVSLTI_BU, OPC_XVSLTI_HU, OPC_XVSLTI_WU, OPC_XVSLTI_DU },
+ }
};
- LoongArchInsn insn;
- static const LoongArchInsn neg_vec_insn[4] = {
- OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D
+ static const LoongArchInsn neg_vec_insn[2][4] = {
+ { OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D },
+ { OPC_XVNEG_B, OPC_XVNEG_H, OPC_XVNEG_W, OPC_XVNEG_D },
};
- static const LoongArchInsn mul_vec_insn[4] = {
- OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D
+ static const LoongArchInsn mul_vec_insn[2][4] = {
+ { OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D },
+ { OPC_XVMUL_B, OPC_XVMUL_H, OPC_XVMUL_W, OPC_XVMUL_D },
};
- static const LoongArchInsn smin_vec_insn[4] = {
- OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D
+ static const LoongArchInsn smin_vec_insn[2][4] = {
+ { OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D },
+ { OPC_XVMIN_B, OPC_XVMIN_H, OPC_XVMIN_W, OPC_XVMIN_D },
};
- static const LoongArchInsn umin_vec_insn[4] = {
- OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU
+ static const LoongArchInsn umin_vec_insn[2][4] = {
+ { OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU },
+ { OPC_XVMIN_BU, OPC_XVMIN_HU, OPC_XVMIN_WU, OPC_XVMIN_DU },
};
- static const LoongArchInsn smax_vec_insn[4] = {
- OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D
+ static const LoongArchInsn smax_vec_insn[2][4] = {
+ { OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D },
+ { OPC_XVMAX_B, OPC_XVMAX_H, OPC_XVMAX_W, OPC_XVMAX_D },
};
- static const LoongArchInsn umax_vec_insn[4] = {
- OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU
+ static const LoongArchInsn umax_vec_insn[2][4] = {
+ { OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU },
+ { OPC_XVMAX_BU, OPC_XVMAX_HU, OPC_XVMAX_WU, OPC_XVMAX_DU },
};
- static const LoongArchInsn ssadd_vec_insn[4] = {
- OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D
+ static const LoongArchInsn ssadd_vec_insn[2][4] = {
+ { OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D },
+ { OPC_XVSADD_B, OPC_XVSADD_H, OPC_XVSADD_W, OPC_XVSADD_D },
};
- static const LoongArchInsn usadd_vec_insn[4] = {
- OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU
+ static const LoongArchInsn usadd_vec_insn[2][4] = {
+ { OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU },
+ { OPC_XVSADD_BU, OPC_XVSADD_HU, OPC_XVSADD_WU, OPC_XVSADD_DU },
};
- static const LoongArchInsn sssub_vec_insn[4] = {
- OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D
+ static const LoongArchInsn sssub_vec_insn[2][4] = {
+ { OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D },
+ { OPC_XVSSUB_B, OPC_XVSSUB_H, OPC_XVSSUB_W, OPC_XVSSUB_D },
};
- static const LoongArchInsn ussub_vec_insn[4] = {
- OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU
+ static const LoongArchInsn ussub_vec_insn[2][4] = {
+ { OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU },
+ { OPC_XVSSUB_BU, OPC_XVSSUB_HU, OPC_XVSSUB_WU, OPC_XVSSUB_DU },
};
- static const LoongArchInsn shlv_vec_insn[4] = {
- OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D
+ static const LoongArchInsn shlv_vec_insn[2][4] = {
+ { OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D },
+ { OPC_XVSLL_B, OPC_XVSLL_H, OPC_XVSLL_W, OPC_XVSLL_D },
};
- static const LoongArchInsn shrv_vec_insn[4] = {
- OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D
+ static const LoongArchInsn shrv_vec_insn[2][4] = {
+ { OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D },
+ { OPC_XVSRL_B, OPC_XVSRL_H, OPC_XVSRL_W, OPC_XVSRL_D },
};
- static const LoongArchInsn sarv_vec_insn[4] = {
- OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D
+ static const LoongArchInsn sarv_vec_insn[2][4] = {
+ { OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D },
+ { OPC_XVSRA_B, OPC_XVSRA_H, OPC_XVSRA_W, OPC_XVSRA_D },
};
- static const LoongArchInsn shli_vec_insn[4] = {
- OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D
+ static const LoongArchInsn shli_vec_insn[2][4] = {
+ { OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D },
+ { OPC_XVSLLI_B, OPC_XVSLLI_H, OPC_XVSLLI_W, OPC_XVSLLI_D },
};
- static const LoongArchInsn shri_vec_insn[4] = {
- OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D
+ static const LoongArchInsn shri_vec_insn[2][4] = {
+ { OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D },
+ { OPC_XVSRLI_B, OPC_XVSRLI_H, OPC_XVSRLI_W, OPC_XVSRLI_D },
};
- static const LoongArchInsn sari_vec_insn[4] = {
- OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D
+ static const LoongArchInsn sari_vec_insn[2][4] = {
+ { OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D },
+ { OPC_XVSRAI_B, OPC_XVSRAI_H, OPC_XVSRAI_W, OPC_XVSRAI_D },
};
- static const LoongArchInsn rotrv_vec_insn[4] = {
- OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
+ static const LoongArchInsn rotrv_vec_insn[2][4] = {
+ { OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D },
+ { OPC_XVROTR_B, OPC_XVROTR_H, OPC_XVROTR_W, OPC_XVROTR_D },
+ };
+ static const LoongArchInsn rotri_vec_insn[2][4] = {
+ { OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D },
+ { OPC_XVROTRI_B, OPC_XVROTRI_H, OPC_XVROTRI_W, OPC_XVROTRI_D },
};
a0 = args[0];
@@ -1884,9 +1957,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
a2 = args[2];
a3 = args[3];
- /* Currently only supports V128 */
- tcg_debug_assert(type == TCG_TYPE_V128);
-
switch (opc) {
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
@@ -1895,49 +1965,55 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_ld(s, type, a0, a1, a2);
break;
case INDEX_op_and_vec:
- tcg_out_opc_vand_v(s, a0, a1, a2);
- break;
+ insn = lasx ? OPC_XVAND_V : OPC_VAND_V;
+ goto vdvjvk;
case INDEX_op_andc_vec:
/*
* vandn vd, vj, vk: vd = vk & ~vj
* andc_vec vd, vj, vk: vd = vj & ~vk
- * vk and vk are swapped
+ * vj and vk are swapped
*/
- tcg_out_opc_vandn_v(s, a0, a2, a1);
- break;
+ a1 = a2;
+ a2 = args[1];
+ insn = lasx ? OPC_XVANDN_V : OPC_VANDN_V;
+ goto vdvjvk;
case INDEX_op_or_vec:
- tcg_out_opc_vor_v(s, a0, a1, a2);
- break;
+ insn = lasx ? OPC_XVOR_V : OPC_VOR_V;
+ goto vdvjvk;
case INDEX_op_orc_vec:
- tcg_out_opc_vorn_v(s, a0, a1, a2);
- break;
+ insn = lasx ? OPC_XVORN_V : OPC_VORN_V;
+ goto vdvjvk;
case INDEX_op_xor_vec:
- tcg_out_opc_vxor_v(s, a0, a1, a2);
- break;
- case INDEX_op_nor_vec:
- tcg_out_opc_vnor_v(s, a0, a1, a2);
- break;
+ insn = lasx ? OPC_XVXOR_V : OPC_VXOR_V;
+ goto vdvjvk;
case INDEX_op_not_vec:
- tcg_out_opc_vnor_v(s, a0, a1, a1);
- break;
+ a2 = a1;
+ /* fall through */
+ case INDEX_op_nor_vec:
+ insn = lasx ? OPC_XVNOR_V : OPC_VNOR_V;
+ goto vdvjvk;
case INDEX_op_cmp_vec:
{
TCGCond cond = args[3];
+
if (const_args[2]) {
/*
* cmp_vec dest, src, value
* Try vseqi/vslei/vslti
*/
int64_t value = sextract64(a2, 0, 8 << vece);
- if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
- cond == TCG_COND_LT) && (-0x10 <= value && value <= 0x0f)) {
- tcg_out32(s, encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+ if ((cond == TCG_COND_EQ ||
+ cond == TCG_COND_LE ||
+ cond == TCG_COND_LT) &&
+ (-0x10 <= value && value <= 0x0f)) {
+ insn = cmp_vec_imm_insn[cond][lasx][vece];
+ tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value));
break;
- } else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
- (0x00 <= value && value <= 0x1f)) {
- tcg_out32(s, encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+ } else if ((cond == TCG_COND_LEU ||
+ cond == TCG_COND_LTU) &&
+ (0x00 <= value && value <= 0x1f)) {
+ insn = cmp_vec_imm_insn[cond][lasx][vece];
+ tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
break;
}
@@ -1946,113 +2022,122 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
* dupi_vec temp, a2
* cmp_vec a0, a1, temp, cond
*/
- tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
- a2 = temp_vec;
+ tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2);
+ a2 = TCG_VEC_TMP0;
}
- insn = cmp_vec_insn[cond][vece];
+ insn = cmp_vec_insn[cond][lasx][vece];
if (insn == 0) {
TCGArg t;
t = a1, a1 = a2, a2 = t;
cond = tcg_swap_cond(cond);
- insn = cmp_vec_insn[cond][vece];
+ insn = cmp_vec_insn[cond][lasx][vece];
tcg_debug_assert(insn != 0);
}
- tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
}
- break;
+ goto vdvjvk;
case INDEX_op_add_vec:
- tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+ tcg_out_addsub_vec(s, lasx, vece, a0, a1, a2, const_args[2], true);
break;
case INDEX_op_sub_vec:
- tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+ tcg_out_addsub_vec(s, lasx, vece, a0, a1, a2, const_args[2], false);
break;
case INDEX_op_neg_vec:
- tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
+ tcg_out32(s, encode_vdvj_insn(neg_vec_insn[lasx][vece], a0, a1));
break;
case INDEX_op_mul_vec:
- tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
- break;
+ insn = mul_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_smin_vec:
- tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
- break;
+ insn = smin_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_smax_vec:
- tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
- break;
+ insn = smax_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_umin_vec:
- tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
- break;
+ insn = umin_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_umax_vec:
- tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
- break;
+ insn = umax_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_ssadd_vec:
- tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
- break;
+ insn = ssadd_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_usadd_vec:
- tcg_out32(s, encode_vdvjvk_insn(usadd_vec_insn[vece], a0, a1, a2));
- break;
+ insn = usadd_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_sssub_vec:
- tcg_out32(s, encode_vdvjvk_insn(sssub_vec_insn[vece], a0, a1, a2));
- break;
+ insn = sssub_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_ussub_vec:
- tcg_out32(s, encode_vdvjvk_insn(ussub_vec_insn[vece], a0, a1, a2));
- break;
+ insn = ussub_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_shlv_vec:
- tcg_out32(s, encode_vdvjvk_insn(shlv_vec_insn[vece], a0, a1, a2));
- break;
+ insn = shlv_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_shrv_vec:
- tcg_out32(s, encode_vdvjvk_insn(shrv_vec_insn[vece], a0, a1, a2));
- break;
+ insn = shrv_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_sarv_vec:
- tcg_out32(s, encode_vdvjvk_insn(sarv_vec_insn[vece], a0, a1, a2));
- break;
+ insn = sarv_vec_insn[lasx][vece];
+ goto vdvjvk;
+ case INDEX_op_rotlv_vec:
+ /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
+ tcg_out32(s, encode_vdvj_insn(neg_vec_insn[lasx][vece],
+ TCG_VEC_TMP0, a2));
+ a2 = TCG_VEC_TMP0;
+ /* fall through */
+ case INDEX_op_rotrv_vec:
+ insn = rotrv_vec_insn[lasx][vece];
+ goto vdvjvk;
case INDEX_op_shli_vec:
- tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
- break;
+ insn = shli_vec_insn[lasx][vece];
+ goto vdvjukN;
case INDEX_op_shri_vec:
- tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
- break;
+ insn = shri_vec_insn[lasx][vece];
+ goto vdvjukN;
case INDEX_op_sari_vec:
- tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
- break;
- case INDEX_op_rotrv_vec:
- tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1, a2));
- break;
- case INDEX_op_rotlv_vec:
- /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
- tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
- tcg_out32(s, encode_vdvjvk_insn(rotrv_vec_insn[vece], a0, a1,
- temp_vec));
- break;
+ insn = sari_vec_insn[lasx][vece];
+ goto vdvjukN;
case INDEX_op_rotli_vec:
/* rotli_vec a1, a2 = rotri_vec a1, -a2 */
a2 = extract32(-a2, 0, 3 + vece);
+ insn = rotri_vec_insn[lasx][vece];
+ goto vdvjukN;
+ case INDEX_op_bitsel_vec:
+ /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
+ if (lasx) {
+ tcg_out_opc_xvbitsel_v(s, a0, a3, a2, a1);
+ } else {
+ tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
+ }
+ break;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+ default:
+ g_assert_not_reached();
+ vdvjvk:
+ tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
+ break;
+ vdvjukN:
switch (vece) {
case MO_8:
- tcg_out_opc_vrotri_b(s, a0, a1, a2);
+ tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
break;
case MO_16:
- tcg_out_opc_vrotri_h(s, a0, a1, a2);
+ tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
break;
case MO_32:
- tcg_out_opc_vrotri_w(s, a0, a1, a2);
+ tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
break;
case MO_64:
- tcg_out_opc_vrotri_d(s, a0, a1, a2);
+ tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
break;
default:
g_assert_not_reached();
}
break;
- case INDEX_op_bitsel_vec:
- /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
- tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
- break;
- case INDEX_op_dupm_vec:
- tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
- break;
- default:
- g_assert_not_reached();
}
}
@@ -2398,7 +2483,11 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
if (cpuinfo & CPUINFO_LSX) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+ if (cpuinfo & CPUINFO_LASX) {
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
+ }
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);