aboutsummaryrefslogtreecommitdiff
path: root/target/arm/translate-a64.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2021-02-24 15:05:32 -0800
committerPeter Maydell <peter.maydell@linaro.org>2021-03-05 15:17:34 +0000
commit519183d3fee58e52f7b51cf146c9dc9edc565059 (patch)
treea69f2eb4218c6fcf7fbd4908661eaf23d129d836 /target/arm/translate-a64.c
parent4565afbbf0b6d897ee746f2410d60460f43c3159 (diff)
downloadqemu-519183d3fee58e52f7b51cf146c9dc9edc565059.zip
qemu-519183d3fee58e52f7b51cf146c9dc9edc565059.tar.gz
qemu-519183d3fee58e52f7b51cf146c9dc9edc565059.tar.bz2
target/arm: Speed up aarch64 TBL/TBX
Always perform one call instead of two for 16-byte operands. Use byte loads/stores directly into the vector register file instead of extractions and deposits to a 64-bit local variable. In order to easily receive pointers into the vector register file, convert the helper to the gvec out-of-line signature. Move the helper into vec_helper.c, where it can make use of H1 and clear_tail. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Tested-by: Alex Bennée <alex.bennee@linaro.org> Message-id: 20210224230532.276878-1-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/translate-a64.c')
-rw-r--r--target/arm/translate-a64.c58
1 files changed, 7 insertions, 51 deletions
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 3aebdb4..b591f09 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -7532,10 +7532,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int is_tblx = extract32(insn, 12, 1);
- int len = extract32(insn, 13, 2);
- TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
- TCGv_i32 tcg_regno, tcg_numregs;
+ int is_tbx = extract32(insn, 12, 1);
+ int len = (extract32(insn, 13, 2) + 1) * 16;
if (op2 != 0) {
unallocated_encoding(s);
@@ -7546,53 +7544,11 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
return;
}
- /* This does a table lookup: for every byte element in the input
- * we index into a table formed from up to four vector registers,
- * and then the output is the result of the lookups. Our helper
- * function does the lookup operation for a single 64 bit part of
- * the input.
- */
- tcg_resl = tcg_temp_new_i64();
- tcg_resh = NULL;
-
- if (is_tblx) {
- read_vec_element(s, tcg_resl, rd, 0, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resl, 0);
- }
-
- if (is_q) {
- tcg_resh = tcg_temp_new_i64();
- if (is_tblx) {
- read_vec_element(s, tcg_resh, rd, 1, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resh, 0);
- }
- }
-
- tcg_idx = tcg_temp_new_i64();
- tcg_regno = tcg_const_i32(rn);
- tcg_numregs = tcg_const_i32(len + 1);
- read_vec_element(s, tcg_idx, rm, 0, MO_64);
- gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
- tcg_regno, tcg_numregs);
- if (is_q) {
- read_vec_element(s, tcg_idx, rm, 1, MO_64);
- gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
- tcg_regno, tcg_numregs);
- }
- tcg_temp_free_i64(tcg_idx);
- tcg_temp_free_i32(tcg_regno);
- tcg_temp_free_i32(tcg_numregs);
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_resl);
-
- if (is_q) {
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- }
- clear_vec_high(s, is_q, rd);
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rm), cpu_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ (len << 6) | (is_tbx << 5) | rn,
+ gen_helper_simd_tblx);
}
/* ZIP/UZP/TRN