aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2018-06-29 15:11:15 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-06-29 15:11:15 +0100
commit26c470a7bb4233454137de1062341ad48947f252 (patch)
tree16788c140111c533c67c8f5c2b6ef470f96616e1
parent802ac0e1e956f3b34a7cb0eda4ec28a60aa87a0a (diff)
downloadqemu-26c470a7bb4233454137de1062341ad48947f252.zip
qemu-26c470a7bb4233454137de1062341ad48947f252.tar.gz
qemu-26c470a7bb4233454137de1062341ad48947f252.tar.bz2
target/arm: Implement ARMv8.2-DotProd
We've already added the helpers with an SVE patch, all that remains is to wire up the aa64 and aa32 translators. Enable the feature within -cpu max for CONFIG_USER_ONLY. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180627043328.11531-36-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--linux-user/elfload.c1
-rw-r--r--target/arm/cpu.c1
-rw-r--r--target/arm/cpu.h1
-rw-r--r--target/arm/cpu64.c1
-rw-r--r--target/arm/translate-a64.c36
-rw-r--r--target/arm/translate.c74
6 files changed, 93 insertions, 21 deletions
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index d1231ad..942a1b6 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -583,6 +583,7 @@ static uint32_t get_elf_hwcap(void)
ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP);
GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS);
GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM);
+ GET_FEATURE(ARM_FEATURE_V8_DOTPROD, ARM_HWCAP_A64_ASIMDDP);
GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA);
GET_FEATURE(ARM_FEATURE_SVE, ARM_HWCAP_A64_SVE);
#undef GET_FEATURE
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 6dcc552..aa62315 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1805,6 +1805,7 @@ static void arm_max_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
+ set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
#endif
}
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index a4507a2..6a8441c 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1480,6 +1480,7 @@ enum arm_features {
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
+ ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
ARM_FEATURE_M_MAIN, /* M profile Main Extension */
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 0360d7e..3b4bc73 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -250,6 +250,7 @@ static void aarch64_max_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_CRC);
set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
+ set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
set_feature(&cpu->env, ARM_FEATURE_SVE);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index eb3a4ab..f986340 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -640,6 +640,16 @@ static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
vec_full_reg_size(s), gvec_op);
}
+/* Expand a 3-operand operation using an out-of-line helper. */
+static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
+ int rn, int rm, int data, gen_helper_gvec_3 *fn)
+{
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
/* Expand a 3-operand + env pointer operation using
* an out-of-line helper.
*/
@@ -11336,6 +11346,14 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
}
feature = ARM_FEATURE_V8_RDM;
break;
+ case 0x02: /* SDOT (vector) */
+ case 0x12: /* UDOT (vector) */
+ if (size != MO_32) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = ARM_FEATURE_V8_DOTPROD;
+ break;
case 0x8: /* FCMLA, #0 */
case 0x9: /* FCMLA, #90 */
case 0xa: /* FCMLA, #180 */
@@ -11389,6 +11407,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
}
return;
+ case 0x2: /* SDOT / UDOT */
+ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
+ u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
+ return;
+
case 0x8: /* FCMLA, #0 */
case 0x9: /* FCMLA, #90 */
case 0xa: /* FCMLA, #180 */
@@ -12568,6 +12591,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
return;
}
break;
+ case 0x0e: /* SDOT */
+ case 0x1e: /* UDOT */
+ if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
case 0x11: /* FCMLA #0 */
case 0x13: /* FCMLA #90 */
case 0x15: /* FCMLA #180 */
@@ -12665,6 +12695,12 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
switch (16 * u + opcode) {
+ case 0x0e: /* SDOT */
+ case 0x1e: /* UDOT */
+ gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
+ u ? gen_helper_gvec_udot_idx_b
+ : gen_helper_gvec_sdot_idx_b);
+ return;
case 0x11: /* FCMLA #0 */
case 0x13: /* FCMLA #90 */
case 0x15: /* FCMLA #180 */
diff --git a/target/arm/translate.c b/target/arm/translate.c
index a7a980b..f845da7 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -7762,9 +7762,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
*/
static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
{
- gen_helper_gvec_3_ptr *fn_gvec_ptr;
- int rd, rn, rm, rot, size, opr_sz;
- TCGv_ptr fpst;
+ gen_helper_gvec_3 *fn_gvec = NULL;
+ gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
+ int rd, rn, rm, opr_sz;
+ int data = 0;
bool q;
q = extract32(insn, 6, 1);
@@ -7777,8 +7778,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
if ((insn & 0xfe200f10) == 0xfc200800) {
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
- size = extract32(insn, 20, 1);
- rot = extract32(insn, 23, 2);
+ int size = extract32(insn, 20, 1);
+ data = extract32(insn, 23, 2); /* rot */
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
return 1;
@@ -7786,13 +7787,20 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
} else if ((insn & 0xfea00f10) == 0xfc800800) {
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
- size = extract32(insn, 20, 1);
- rot = extract32(insn, 24, 1);
+ int size = extract32(insn, 20, 1);
+ data = extract32(insn, 24, 1); /* rot */
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
return 1;
}
fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
+ } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
+ /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
+ bool u = extract32(insn, 4, 1);
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+ return 1;
+ }
+ fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
} else {
return 1;
}
@@ -7807,12 +7815,19 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
}
opr_sz = (1 + q) * 8;
- fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
- opr_sz, opr_sz, rot, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (fn_gvec_ptr) {
+ TCGv_ptr fpst = get_fpstatus_ptr(1);
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+ vfp_reg_offset(1, rn),
+ vfp_reg_offset(1, rm), fpst,
+ opr_sz, opr_sz, data, fn_gvec_ptr);
+ tcg_temp_free_ptr(fpst);
+ } else {
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
+ vfp_reg_offset(1, rn),
+ vfp_reg_offset(1, rm),
+ opr_sz, opr_sz, data, fn_gvec);
+ }
return 0;
}
@@ -7826,9 +7841,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
{
- gen_helper_gvec_3_ptr *fn_gvec_ptr;
+ gen_helper_gvec_3 *fn_gvec = NULL;
+ gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz, data;
- TCGv_ptr fpst;
bool q;
q = extract32(insn, 6, 1);
@@ -7862,6 +7877,16 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
data = (index << 2) | rot;
fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
: gen_helper_gvec_fcmlah_idx);
+ } else if ((insn & 0xffb00f00) == 0xfe200d00) {
+ /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
+ int u = extract32(insn, 4, 1);
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+ return 1;
+ }
+ fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
+ /* rm is just Vm, and index is M. */
+ data = extract32(insn, 5, 1); /* index */
+ rm = extract32(insn, 0, 4);
} else {
return 1;
}
@@ -7876,12 +7901,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
}
opr_sz = (1 + q) * 8;
- fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (fn_gvec_ptr) {
+ TCGv_ptr fpst = get_fpstatus_ptr(1);
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+ vfp_reg_offset(1, rn),
+ vfp_reg_offset(1, rm), fpst,
+ opr_sz, opr_sz, data, fn_gvec_ptr);
+ tcg_temp_free_ptr(fpst);
+ } else {
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
+ vfp_reg_offset(1, rn),
+ vfp_reg_offset(1, rm),
+ opr_sz, opr_sz, data, fn_gvec);
+ }
return 0;
}