diff options
author | Marco Liebel <quic_mliebel@quicinc.com> | 2023-05-22 10:47:08 -0700 |
---|---|---|
committer | Taylor Simpson <tsimpson@quicinc.com> | 2023-05-26 07:03:41 -0700 |
commit | 3fd49e22171a019beebffdda081380a5276525a6 (patch) | |
tree | ffe3fa9d78e37ce94aa2d3a8b60315c465e10d42 | |
parent | 0d57cd61d95fbbe86a1ce3b2ef2f8f1254b4116a (diff) | |
download | qemu-3fd49e22171a019beebffdda081380a5276525a6.zip qemu-3fd49e22171a019beebffdda081380a5276525a6.tar.gz qemu-3fd49e22171a019beebffdda081380a5276525a6.tar.bz2 |
Hexagon (target/hexagon) Fix assignment to tmp registers
The order in which instructions are generated by gen_insn() influences
assignment to tmp registers. During generation, tmp instructions (e.g.
generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of
register to use as source. If some instruction (e.g.
generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior
to the corresponding tmp instruction, the vregs_updated_tmp bit map
isn't updated in time.
Exmple:
{ v14.tmp = v16; v25 = v14 } This works properly because
generate_V6_vassign_tmp is generated before generate_V6_vassign
and the bit map is updated.
{ v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) }
This does not work properly because vmpyo is generated before
vcombine and therefore the bit map does not yet know that there's
a tmp register.
The parentheses in the decoding function were in the wrong place.
Moving them to the correct location makes shuffling of .tmp vector
registers work as expected.
Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Tested-by: Taylor Simpson <tsimpson@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230522174708.464197-1-quic_mliebel@quicinc.com>
-rw-r--r-- | target/hexagon/mmvec/decode_ext_mmvec.c | 8 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_misc.c | 31 |
2 files changed, 35 insertions, 4 deletions
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c index 061a65a..174eb3b 100644 --- a/target/hexagon/mmvec/decode_ext_mmvec.c +++ b/target/hexagon/mmvec/decode_ext_mmvec.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt) int i; for (i = 0; i < pkt->num_insns; i++) { uint16_t opcode = pkt->insn[i].opcode; - if (GET_ATTRIB(opcode, A_LOAD) && - (GET_ATTRIB(opcode, A_CVI_NEW) || - GET_ATTRIB(opcode, A_CVI_TMP))) { + if ((GET_ATTRIB(opcode, A_LOAD) && + GET_ATTRIB(opcode, A_CVI_NEW)) || + GET_ATTRIB(opcode, A_CVI_TMP)) { /* * Find prior consuming vector instructions * Move to end of packet diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index 09dec8d..b45170a 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -60,6 +60,36 @@ static void test_load_tmp(void) check_output_w(__LINE__, BUFSIZE); } +static void test_load_tmp2(void) +{ + void *pout0 = &output[0]; + void *pout1 = &output[1]; + + asm volatile( + "r0 = #0x03030303\n\t" + "v16 = vsplat(r0)\n\t" + "r0 = #0x04040404\n\t" + "v18 = vsplat(r0)\n\t" + "r0 = #0x05050505\n\t" + "v21 = vsplat(r0)\n\t" + "{\n\t" + " v25:24 += vmpyo(v18.w, v14.h)\n\t" + " v15:14.tmp = vcombine(v21, v16)\n\t" + "}\n\t" + "vmem(%0 + #0) = v24\n\t" + "vmem(%1 + #0) = v25\n\t" + : : "r"(pout0), "r"(pout1) + : "r0", "v16", "v18", "v21", "v24", "v25", "memory" + ); + + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) { + expect[0].w[i] = 0x180c0000; + expect[1].w[i] = 0x000c1818; + } + + check_output_w(__LINE__, 2); +} + static void test_load_cur(void) { void *p0 = buffer0; @@ -435,6 +465,7 @@ int main() init_buffers(); test_load_tmp(); + test_load_tmp2(); test_load_cur(); test_load_aligned(); test_load_unaligned(); |