diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-02-24 15:05:32 -0800 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-03-05 15:17:34 +0000 |
commit | 519183d3fee58e52f7b51cf146c9dc9edc565059 (patch) | |
tree | a69f2eb4218c6fcf7fbd4908661eaf23d129d836 /target/arm/translate-a64.c | |
parent | 4565afbbf0b6d897ee746f2410d60460f43c3159 (diff) | |
download | qemu-519183d3fee58e52f7b51cf146c9dc9edc565059.zip qemu-519183d3fee58e52f7b51cf146c9dc9edc565059.tar.gz qemu-519183d3fee58e52f7b51cf146c9dc9edc565059.tar.bz2 |
target/arm: Speed up aarch64 TBL/TBX
Always perform one call instead of two for 16-byte operands.
Use byte loads/stores directly into the vector register file
instead of extractions and deposits to a 64-bit local variable.
In order to easily receive pointers into the vector register file,
convert the helper to the gvec out-of-line signature. Move the
helper into vec_helper.c, where it can make use of H1 and clear_tail.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Message-id: 20210224230532.276878-1-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/translate-a64.c')
-rw-r--r-- | target/arm/translate-a64.c | 58 |
1 files changed, 7 insertions, 51 deletions
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 3aebdb4..b591f09 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -7532,10 +7532,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int is_tblx = extract32(insn, 12, 1); - int len = extract32(insn, 13, 2); - TCGv_i64 tcg_resl, tcg_resh, tcg_idx; - TCGv_i32 tcg_regno, tcg_numregs; + int is_tbx = extract32(insn, 12, 1); + int len = (extract32(insn, 13, 2) + 1) * 16; if (op2 != 0) { unallocated_encoding(s); @@ -7546,53 +7544,11 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) return; } - /* This does a table lookup: for every byte element in the input - * we index into a table formed from up to four vector registers, - * and then the output is the result of the lookups. Our helper - * function does the lookup operation for a single 64 bit part of - * the input. - */ - tcg_resl = tcg_temp_new_i64(); - tcg_resh = NULL; - - if (is_tblx) { - read_vec_element(s, tcg_resl, rd, 0, MO_64); - } else { - tcg_gen_movi_i64(tcg_resl, 0); - } - - if (is_q) { - tcg_resh = tcg_temp_new_i64(); - if (is_tblx) { - read_vec_element(s, tcg_resh, rd, 1, MO_64); - } else { - tcg_gen_movi_i64(tcg_resh, 0); - } - } - - tcg_idx = tcg_temp_new_i64(); - tcg_regno = tcg_const_i32(rn); - tcg_numregs = tcg_const_i32(len + 1); - read_vec_element(s, tcg_idx, rm, 0, MO_64); - gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx, - tcg_regno, tcg_numregs); - if (is_q) { - read_vec_element(s, tcg_idx, rm, 1, MO_64); - gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx, - tcg_regno, tcg_numregs); - } - tcg_temp_free_i64(tcg_idx); - tcg_temp_free_i32(tcg_regno); - tcg_temp_free_i32(tcg_numregs); - - write_vec_element(s, tcg_resl, rd, 0, MO_64); - tcg_temp_free_i64(tcg_resl); - - if (is_q) { - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_resh); - } - clear_vec_high(s, is_q, rd); + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rm), cpu_env, + is_q ? 16 : 8, vec_full_reg_size(s), + (len << 6) | (is_tbx << 5) | rn, + gen_helper_simd_tblx); } /* ZIP/UZP/TRN |