diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-05-18 20:44:34 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-05-18 20:44:34 -0700 |
commit | 449d6d9eb44772e69f11d002e3c1e2be8a91c350 (patch) | |
tree | 124ca1db8453cbdb7a9c6ab83c84785ed1097405 /target | |
parent | 146f515110e86aefe3bc2e8eb581ab724614060f (diff) | |
parent | 9073bfd725440da0af44f1ee1e3bcf72e9de39b6 (diff) | |
download | qemu-449d6d9eb44772e69f11d002e3c1e2be8a91c350.zip qemu-449d6d9eb44772e69f11d002e3c1e2be8a91c350.tar.gz qemu-449d6d9eb44772e69f11d002e3c1e2be8a91c350.tar.bz2 |
Merge tag 'pull-hex-20230518-1' of https://github.com/quic/qemu into staging
Hexagon update
# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEENjXHiM5iuR/UxZq0ewJE+xLeRCIFAmRmgQgACgkQewJE+xLe
# RCJLtAf8C/0kQRa4mjnbsztXuFyca53UxAv3BSBEDla4ZcMfFBoVJsGB3OP7IPXd
# KBQpkLyJAVye9idex5xqdp9nIfoGKDTsc6YtCfGujZ17cDpzLRDpHdUTex8PcZYK
# wpfM3hoVJsYRBMsojZ4OaxatjFQ+FWzrIH6FcgH086Q8TH4w9dZLNEJzHC4lOj0s
# 7qOuw2tgm+vOVlzsk/fv6/YD/BTeZTON3jgTPvAnvdRLb/482UpM9JkJ8E4rbte3
# Ss5PUK8QTQHU0yamspGy/PfsYxiptM+jIWGd836fAGzwF12Ug27mSc1enndRtQVW
# pQTdnOnWuuRzOwEpd7x3xh9upACm4g==
# =1CyJ
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 18 May 2023 12:48:24 PM PDT
# gpg: using RSA key 3635C788CE62B91FD4C59AB47B0244FB12DE4422
# gpg: Good signature from "Taylor Simpson (Rock on) <tsimpson@quicinc.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 3635 C788 CE62 B91F D4C5 9AB4 7B02 44FB 12DE 4422
* tag 'pull-hex-20230518-1' of https://github.com/quic/qemu: (44 commits)
Hexagon (linux-user/hexagon): handle breakpoints
Hexagon (gdbstub): add HVX support
Hexagon (gdbstub): fix p3:0 read and write via stub
Hexagon: add core gdbstub xml data for LLDB
gdbstub: add test for untimely stop-reply packets
gdbstub: only send stop-reply packets when allowed to
Remove test_vshuff from hvx_misc tests
Hexagon (decode): look for pkts with multiple insns at the same slot
Hexagon (iclass): update J4_hintjumpr slot constraints
Hexagon: append eflags to unknown cpu model string
Hexagon: list available CPUs with `-cpu help`
Hexagon (target/hexagon/*.py): raise exception on reg parsing error
target/hexagon: fix = vs. == mishap
Hexagon (target/hexagon) Additional instructions handled by idef-parser
Hexagon (target/hexagon) Move items to DisasContext
Hexagon (target/hexagon) Move pkt_has_store_s1 to DisasContext
Hexagon (target/hexagon) Move pred_written to DisasContext
Hexagon (target/hexagon) Move new_pred_value to DisasContext
Hexagon (target/hexagon) Move new_value to DisasContext
Hexagon (target/hexagon) Make special new_value for USR
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
35 files changed, 1646 insertions, 421 deletions
diff --git a/target/hexagon/README b/target/hexagon/README index ebafc78..4381117 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -4,10 +4,10 @@ is a wide vector coprocessor designed for high performance computer vision, image processing, machine learning, and other workloads. The following versions of the Hexagon core are supported - Scalar core: v67 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v67-programmer-s-reference-manual - HVX extension: v66 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v66-hvx-programmer-s-reference-manual + Scalar core: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-programmers-reference-manual-rev-aa + HVX extension: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-hvx-programmers-reference-manual-rev-aa We presented an overview of the project at the 2019 KVM Forum. https://kvmforum2019.sched.com/event/Tmwc/qemu-hexagon-automatic-translation-of-the-isa-manual-pseudcode-to-tiny-code-instructions-of-a-vliw-architecture-niccolo-izzo-revng-taylor-simpson-qualcomm-innovation-center @@ -87,7 +87,7 @@ tcg_funcs_generated.c.inc TCGv RsV = hex_gpr[insn->regno[1]]; TCGv RtV = hex_gpr[insn->regno[2]]; gen_helper_A2_add(RdV, cpu_env, RsV, RtV); - gen_log_reg_write(RdN, RdV); + gen_log_reg_write(ctx, RdN, RdV); } helper_funcs_generated.c.inc @@ -186,7 +186,7 @@ We also generate an analyze_<tag> function for each instruction. Currently, these functions record the writes to registers by calling ctx_log_*. During gen_start_packet, we invoke the analyze_<tag> function for each instruction in the packet, and we mark the implicit writes. After the analysis is performed, -we initialize hex_new_value for each of the predicated assignments. +we initialize the result register for each of the predicated assignments. In addition to instruction semantics, we use a generator to create the decode tree. This generation is also a two step process. The first step is to run @@ -304,4 +304,4 @@ Here are some handy places to set breakpoints At the start of execution of a packet for a given PC br helper_debug_start_packet if env->gpr[41] == 0xdeadbeef At the end of execution of a packet for a given PC - br helper_debug_commit_end if env->this_PC == 0xdeadbeef + br helper_debug_commit_end if this_PC == 0xdeadbeef diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index da79b41..d053d68 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env) void arch_fpop_end(CPUHexagonState *env) { + const bool pkt_need_commit = true; int flags = get_float_exception_flags(&env->fp_status); if (flags != 0) { SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 9874d16..21d457f 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,6 +52,12 @@ DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "") DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "") +DEF_ATTRIB(RELEASE, "Releases a lock", "", "") +DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "") + +DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "") +DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "") +DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "") /* V6 Vector attributes */ DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "") @@ -63,23 +69,27 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "") DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "") +DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "") DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "") DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "") DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "") +DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "") DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "") DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "") DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "") DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "") +DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "") /* Change-of-flow attributes */ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "") DEF_ATTRIB(CALL, "Function call instruction", "", "") DEF_ATTRIB(COF, "Change-of-flow instruction", "", "") +DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "") DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "") DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "") DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "") @@ -102,6 +112,10 @@ DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1") DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2") DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3") DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") DEF_ATTRIB(WRITES_PRED_REG, "Writes a predicate register", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") @@ -140,6 +154,8 @@ DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "") DEF_ATTRIB(ICINVA, "icinva", "", "") DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "") +DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "") + /* Documentation Notes */ DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") @@ -148,7 +164,11 @@ DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") +DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "") +DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "") +/* V6 MMVector Notes for Documentation */ +DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") /* Restrictions to make note of */ DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index ab40cfc..f155936 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,9 +24,32 @@ #include "hw/qdev-properties.h" #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" +#include "exec/gdbstub.h" -static void hexagon_v67_cpu_init(Object *obj) +static void hexagon_v67_cpu_init(Object *obj) { } +static void hexagon_v68_cpu_init(Object *obj) { } +static void hexagon_v69_cpu_init(Object *obj) { } +static void hexagon_v71_cpu_init(Object *obj) { } +static void hexagon_v73_cpu_init(Object *obj) { } + +static void hexagon_cpu_list_entry(gpointer data, gpointer user_data) +{ + ObjectClass *oc = data; + char *name = g_strdup(object_class_get_name(oc)); + if (g_str_has_suffix(name, HEXAGON_CPU_TYPE_SUFFIX)) { + name[strlen(name) - strlen(HEXAGON_CPU_TYPE_SUFFIX)] = '\0'; + } + qemu_printf(" %s\n", name); + g_free(name); +} + +void hexagon_cpu_list(void) { + GSList *list; + list = object_class_get_list_sorted(TYPE_HEXAGON_CPU, false); + qemu_printf("Available CPUs:\n"); + g_slist_foreach(list, hexagon_cpu_list_entry, NULL); + g_slist_free(list); } static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) @@ -52,6 +75,8 @@ static Property hexagon_lldb_compat_property = static Property hexagon_lldb_stack_adjust_property = DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong); +static Property hexagon_short_circuit_property = + DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true); const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -315,6 +340,11 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) return; } + gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register, + hexagon_hvx_gdb_write_register, + NUM_VREGS + NUM_QREGS, + "hexagon-hvx.xml", 0); + qemu_init_vcpu(cs); cpu_reset(cs); @@ -328,6 +358,7 @@ static void hexagon_cpu_init(Object *obj) cpu_set_cpustate_pointers(cpu); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property); + qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property); } #include "hw/core/tcg-cpu-ops.h" @@ -358,8 +389,9 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->get_pc = hexagon_cpu_get_pc; cc->gdb_read_register = hexagon_gdb_read_register; cc->gdb_write_register = hexagon_gdb_write_register; - cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS; + cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS; cc->gdb_stop_before_watchpoint = true; + cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; cc->tcg_ops = &hexagon_tcg_ops; } @@ -382,6 +414,10 @@ static const TypeInfo hexagon_cpu_type_infos[] = { .class_init = hexagon_cpu_class_init, }, DEFINE_CPU(TYPE_HEXAGON_CPU_V67, hexagon_v67_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V68, hexagon_v68_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V69, hexagon_v69_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V71, hexagon_v71_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V73, hexagon_v73_cpu_init), }; DEFINE_TYPES(hexagon_cpu_type_infos) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 81b663e..bfcb105 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -43,6 +43,13 @@ #define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU #define TYPE_HEXAGON_CPU_V67 HEXAGON_CPU_TYPE_NAME("v67") +#define TYPE_HEXAGON_CPU_V68 HEXAGON_CPU_TYPE_NAME("v68") +#define TYPE_HEXAGON_CPU_V69 HEXAGON_CPU_TYPE_NAME("v69") +#define TYPE_HEXAGON_CPU_V71 HEXAGON_CPU_TYPE_NAME("v71") +#define TYPE_HEXAGON_CPU_V73 HEXAGON_CPU_TYPE_NAME("v73") + +void hexagon_cpu_list(void); +#define cpu_list hexagon_cpu_list #define MMU_USER_IDX 0 @@ -78,28 +85,21 @@ typedef struct { typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; - target_ulong branch_taken; /* For comparing with LLDB on target - see adjust_stack_ptrs function */ target_ulong last_pc_dumped; target_ulong stack_start; uint8_t slot_cancelled; - target_ulong new_value[TOTAL_PER_THREAD_REGS]; + target_ulong new_value_usr; /* * Only used when HEX_DEBUG is on, but unconditionally included * to reduce recompile time when turning HEX_DEBUG on/off. */ - target_ulong this_PC; target_ulong reg_written[TOTAL_PER_THREAD_REGS]; - target_ulong new_pred_value[NUM_PREGS]; - target_ulong pred_written; - MemLog mem_log_stores[STORES_MAX]; - target_ulong pkt_has_store_s1; - target_ulong dczero_addr; float_status fp_status; @@ -146,6 +146,7 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; + bool short_circuit; }; #include "cpu_bits.h" diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 041c8de..946c55c 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -797,7 +797,26 @@ static bool decode_parsebits_is_loopend(uint32_t encoding32) return bits == 0x2; } -static void +static bool has_valid_slot_assignment(Packet *pkt) +{ + int used_slots = 0; + for (int i = 0; i < pkt->num_insns; i++) { + int slot_mask; + Insn *insn = &pkt->insn[i]; + if (decode_opcode_ends_loop(insn->opcode)) { + /* We overload slot 0 for endloop. */ + continue; + } + slot_mask = 1 << insn->slot; + if (used_slots & slot_mask) { + return false; + } + used_slots |= slot_mask; + } + return true; +} + +static bool decode_set_slot_number(Packet *pkt) { int slot; @@ -886,6 +905,8 @@ decode_set_slot_number(Packet *pkt) /* Then push it to slot0 */ pkt->insn[slot1_iidx].slot = 0; } + + return has_valid_slot_assignment(pkt); } /* @@ -961,8 +982,11 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, decode_apply_extenders(pkt); if (!disas_only) { decode_remove_extenders(pkt); + if (!decode_set_slot_number(pkt)) { + /* Invalid packet */ + return 0; + } } - decode_set_slot_number(pkt); decode_fill_newvalue_regno(pkt); if (pkt->pkt_has_hvx) { diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index 46083da..54d37e0 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -25,6 +25,14 @@ int hexagon_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = 0; + for (int i = 0; i < NUM_PREGS; i++) { + p3_0 = deposit32(p3_0, i * 8, 8, env->pred[i]); + } + return gdb_get_regl(mem_buf, p3_0); + } + if (n < TOTAL_PER_THREAD_REGS) { return gdb_get_regl(mem_buf, env->gpr[n]); } @@ -37,6 +45,14 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = ldtul_p(mem_buf); + for (int i = 0; i < NUM_PREGS; i++) { + env->pred[i] = extract32(p3_0, i * 8, 8); + } + return sizeof(target_ulong); + } + if (n < TOTAL_PER_THREAD_REGS) { env->gpr[n] = ldtul_p(mem_buf); return sizeof(target_ulong); @@ -44,3 +60,71 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) g_assert_not_reached(); } + +static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->VRegs[n].uw[i]); + } + return total; +} + +static int gdb_get_qreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->QRegs[n].uw[i]); + } + return total; +} + +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_get_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_get_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} + +static int gdb_put_vreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + env->VRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES; +} + +static int gdb_put_qreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + env->QRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES / 8; +} + +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_put_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_put_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index c74443d..00868cc 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -35,47 +35,55 @@ def analyze_opn_old(f, tag, regtype, regid, regno): predicated = "true" if is_predicated(tag) else "false" if regtype == "R": if regid in {"ss", "tt"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid in {"dd", "ee", "xx", "yy"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid in {"d", "e", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") elif regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_pred_write(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid == "dd": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid == "s": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid == "d": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": newv = "EXT_DFL" if hex_common.is_new_result(tag): @@ -88,22 +96,25 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n" ) elif regid in {"uu", "vv"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n") elif regid in {"s", "u", "v", "w"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") elif regid in {"d", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_qreg_write(ctx, {regN});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_qreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "G": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -114,7 +125,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "S": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -125,30 +136,33 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn_new(f, tag, regtype, regid, regno): regN = f"{regtype}{regid}N" if regtype == "N": if regid in {"s", "t"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn(f, tag, regtype, regid, toss, numregs, i): @@ -160,9 +174,9 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): analyze_opn_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -174,8 +188,10 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): ## Insn *insn G_GNUC_UNUSED = ctx->insn; ## const int RdN = insn->regno[0]; ## ctx_log_reg_write(ctx, RdN, false); -## // const int RsN = insn->regno[1]; -## // const int RtN = insn->regno[2]; +## const int RsN = insn->regno[1]; +## ctx_log_reg_read(ctx, RsN); +## const int RtN = insn->regno[2]; +## ctx_log_reg_read(ctx, RtN); ## } ## def gen_analyze_func(f, tag, regs, imms): @@ -193,8 +209,11 @@ def gen_analyze_func(f, tag, regs, imms): has_generated_helper = not hex_common.skip_qemu_helper( tag ) and not hex_common.is_idef_parser_enabled(tag) - if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]: - f.write(" ctx->need_pkt_has_store_s1 = true;\n") + + ## Mark HVX instructions with generated helpers + if (has_generated_helper and + "A_CVI" in hex_common.attribdict[tag]): + f.write(" ctx->has_hvx_helper = true;\n") f.write("}\n\n") diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c73d792..e80550f 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -87,9 +87,9 @@ def gen_helper_arg_opn(f, regtype, regid, i, tag): elif hex_common.is_new_val(regtype, regid, tag): gen_helper_arg_new(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_arg_imm(f, immlett): @@ -135,7 +135,7 @@ def gen_helper_dest_decl_opn(f, regtype, regid, i): else: gen_helper_dest_decl(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_src_var_ext(f, regtype, regid): @@ -185,7 +185,7 @@ def gen_helper_return_opn(f, regtype, regid, i): else: gen_helper_return(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -239,7 +239,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): else: gen_helper_return_type(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 if numscalarresults == 0: @@ -262,7 +262,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): # This is the return value of the function continue else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 ## For conditional instructions, we pass in the destination register @@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", uint32_t pkt_has_multi_cof") + if (hex_common.need_pkt_need_commit(tag)): + f.write(", uint32_t pkt_need_commit") if hex_common.need_PC(tag): if i > 0: @@ -301,7 +303,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): if i > 0: f.write(", ") - f.write("uint32_t slot") + f.write("uint32_t slotval") i += 1 if hex_common.need_part1(tag): if i > 0: @@ -327,7 +329,12 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.is_hvx_reg(regtype): gen_helper_src_var_ext(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) + + if hex_common.need_slot(tag): + if "A_LOAD" in hex_common.attribdict[tag]: + f.write(" bool pkt_has_store_s1 = slotval & 0x1;\n") + f.write(" uint32_t slot = slotval >> 1;\n") if "A_FPOP" in hex_common.attribdict[tag]: f.write(" arch_fpop_start(env);\n") diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 187cd6e..3dedd76 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -52,7 +52,7 @@ def gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_single(regid): f.write(f", {def_helper_types[regtype]}") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): for immlett, bits, immshift in imms: f.write(", s32") - ## Add the arguments for the instruction pkt_has_multi_cof, slot and - ## part1 (if needed) + ## Add the arguments for the instruction pkt_has_multi_cof, + ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed) if hex_common.need_pkt_has_multi_cof(tag): f.write(", i32") + if hex_common.need_pkt_need_commit(tag): + f.write(', i32') if hex_common.need_PC(tag): f.write(", i32") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index afe68bd..29160fc 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -103,12 +103,29 @@ def main(): continue if tag.startswith("V6_"): continue - if tag.startswith("F"): + if ( tag.startswith("F") and + tag not in { + "F2_sfimm_p", + "F2_sfimm_n", + "F2_dfimm_p", + "F2_dfimm_n", + "F2_dfmpyll", + "F2_dfmpylh" + }): continue if tag.endswith("_locked"): continue if "A_COF" in hex_common.attribdict[tag]: continue + if ( tag.startswith('R6_release_') ): + continue + ## Skip instructions that are incompatible with short-circuit + ## packet register writes + if ( tag == 'S2_insert' or + tag == 'S2_insert_rp' or + tag == 'S2_asr_r_svw_trun' or + tag == 'A2_swiz' ): + continue regs = tagregs[tag] imms = tagimms[tag] @@ -130,7 +147,7 @@ def main(): elif is_single_new: arguments.append(f"{prefix}{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) for immlett, bits, immshift in imms: arguments.append(hex_common.imm_name(immlett)) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 329e7a1..d78d99d 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -501,6 +501,38 @@ do { RsV = RsV; } while (0) /* + * allocframe(#uiV) + * RxV == r29 + */ +#define fGEN_TCG_S2_allocframe(SHORTCODE) \ + gen_allocframe(ctx, RxV, uiV) + +/* sub-instruction version (no RxV, so handle it manually) */ +#define fGEN_TCG_SS2_allocframe(SHORTCODE) \ + do { \ + TCGv r29 = tcg_temp_new(); \ + tcg_gen_mov_tl(r29, hex_gpr[HEX_REG_SP]); \ + gen_allocframe(ctx, r29, uiV); \ + gen_log_reg_write(ctx, HEX_REG_SP, r29); \ + } while (0) + +/* + * Rdd32 = deallocframe(Rs32):raw + * RddV == r31:30 + * RsV == r30 + */ +#define fGEN_TCG_L2_deallocframe(SHORTCODE) \ + gen_deallocframe(ctx, RddV, RsV) + +/* sub-instruction version (no RddV/RsV, so handle it manually) */ +#define fGEN_TCG_SL2_deallocframe(SHORTCODE) \ + do { \ + TCGv_i64 r31_30 = tcg_temp_new_i64(); \ + gen_deallocframe(ctx, r31_30, hex_gpr[HEX_REG_FP]); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, r31_30); \ + } while (0) + +/* * dealloc_return * Assembler mapped to * r31:30 = dealloc_return(r30):raw @@ -515,7 +547,7 @@ do { \ TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \ gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \ - gen_log_reg_write_pair(HEX_REG_FP, RddV); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); \ } while (0) /* @@ -549,9 +581,9 @@ #define fGEN_TCG_SL2_return_f(SHORTCODE) \ gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_return_tnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_return_fnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* * Mathematical operations with more than one definition require @@ -560,7 +592,16 @@ #define fGEN_TCG_A5_ACS(SHORTCODE) \ do { \ gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ - gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \ + tcg_constant_tl(ctx->need_commit)); \ + } while (0) + +#define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_helper_cabacdecbin_pred(p0, RssV, RttV); \ + gen_helper_cabacdecbin_val(RddV, RssV, RttV); \ + gen_log_pred_write(ctx, 0, p0); \ } while (0) /* @@ -653,6 +694,8 @@ gen_call(ctx, riV) #define fGEN_TCG_J2_callr(SHORTCODE) \ gen_callr(ctx, RsV) +#define fGEN_TCG_J2_callrh(SHORTCODE) \ + gen_callr(ctx, RsV) #define fGEN_TCG_J2_callt(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_EQ, riV) @@ -663,6 +706,27 @@ #define fGEN_TCG_J2_callrf(SHORTCODE) \ gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV) +#define fGEN_TCG_J2_loop0r(SHORTCODE) \ + gen_loop0r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop1r(SHORTCODE) \ + gen_loop1r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop0i(SHORTCODE) \ + gen_loop0i(ctx, UiV, riV) +#define fGEN_TCG_J2_loop1i(SHORTCODE) \ + gen_loop1i(ctx, UiV, riV) +#define fGEN_TCG_J2_ploop1sr(SHORTCODE) \ + gen_ploopNsr(ctx, 1, RsV, riV) +#define fGEN_TCG_J2_ploop1si(SHORTCODE) \ + gen_ploopNsi(ctx, 1, UiV, riV) +#define fGEN_TCG_J2_ploop2sr(SHORTCODE) \ + gen_ploopNsr(ctx, 2, RsV, riV) +#define fGEN_TCG_J2_ploop2si(SHORTCODE) \ + gen_ploopNsi(ctx, 2, UiV, riV) +#define fGEN_TCG_J2_ploop3sr(SHORTCODE) \ + gen_ploopNsr(ctx, 3, RsV, riV) +#define fGEN_TCG_J2_ploop3si(SHORTCODE) \ + gen_ploopNsi(ctx, 3, UiV, riV) + #define fGEN_TCG_J2_endloop0(SHORTCODE) \ gen_endloop0(ctx) #define fGEN_TCG_J2_endloop1(SHORTCODE) \ @@ -847,10 +911,20 @@ #define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \ gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_NE, riV) +/* p0 = cmp.eq(r0, #7) */ +#define fGEN_TCG_SA1_cmpeqi(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_comparei(TCG_COND_EQ, p0, RsV, uiV); \ + gen_log_pred_write(ctx, 0, p0); \ + } while (0) + #define fGEN_TCG_J2_jump(SHORTCODE) \ gen_jump(ctx, riV) #define fGEN_TCG_J2_jumpr(SHORTCODE) \ gen_jumpr(ctx, RsV) +#define fGEN_TCG_J2_jumprh(SHORTCODE) \ + gen_jumpr(ctx, RsV) #define fGEN_TCG_J4_jumpseti(SHORTCODE) \ do { \ tcg_gen_movi_tl(RdV, UiV); \ @@ -1044,6 +1118,22 @@ gen_jump(ctx, riV); \ } while (0) +/* if (p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrtnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_EQ, RdV, \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + +/* if (!p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrfnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_NE, RdV, \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + #define fGEN_TCG_J2_pause(SHORTCODE) \ do { \ uiV = uiV; \ @@ -1067,9 +1157,9 @@ gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* Count trailing zeros/ones */ #define fGEN_TCG_S2_ct0(SHORTCODE) \ @@ -1095,6 +1185,24 @@ tcg_gen_extrl_i64_i32(RdV, tmp); \ } while (0) +#define fGEN_TCG_S2_insert(SHORTCODE) \ + do { \ + int width = uiV; \ + int offset = UiV; \ + if (width != 0) { \ + if (offset + width > 32) { \ + width = 32 - offset; \ + } \ + tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \ + } \ + } while (0) +#define fGEN_TCG_S2_insert_rp(SHORTCODE) \ + gen_insert_rp(ctx, RxV, RsV, RttV) +#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \ + gen_asr_r_svw_trun(ctx, RdV, RssV, RtV) +#define fGEN_TCG_A2_swiz(SHORTCODE) \ + tcg_gen_bswap_tl(RdV, RsV) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) @@ -1236,6 +1344,24 @@ uiV = uiV; \ } while (0) +#define fGEN_TCG_L2_loadw_aq(SHORTCODE) SHORTCODE +#define fGEN_TCG_L4_loadd_aq(SHORTCODE) SHORTCODE + +/* Nothing to do for these in qemu, need to suppress compiler warnings */ +#define fGEN_TCG_R6_release_at_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) +#define fGEN_TCG_R6_release_st_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) + +#define fGEN_TCG_S2_storew_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE) SHORTCODE + #define fGEN_TCG_J2_trap0(SHORTCODE) \ do { \ uiV = uiV; \ diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index fcb3384..c73467b 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -37,7 +37,7 @@ def genptr_decl_pair_writable(f, tag, regtype, regid, regno): elif regtype == "C": f.write(f" const int {regN} = insn->regno[{regno}] + HEX_REG_SA0;\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) f.write(f" TCGv_i64 {regtype}{regid}V = " f"get_result_gpr_pair(ctx, {regN});\n") @@ -53,7 +53,7 @@ def genptr_decl_writable(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" TCGv {regtype}{regid}V = tcg_temp_new();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl(f, tag, regtype, regid, regno): @@ -71,7 +71,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x", "y"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: f.write( @@ -80,7 +80,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write(f" TCGv_i64 {regtype}{regid}V = " f"tcg_temp_new_i64();\n") @@ -96,7 +96,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid == "d": genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -105,7 +105,7 @@ def genptr_decl(f, tag, regtype, regid, regno): "HEX_REG_M0];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"dd"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -159,7 +159,7 @@ def genptr_decl(f, tag, regtype, regid, regno): f"{regtype}{regid}V_off);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -180,9 +180,9 @@ def genptr_decl(f, tag, regtype, regid, regno): if not hex_common.skip_qemu_helper(tag): f.write(f" TCGv_ptr {regtype}{regid}V = " "tcg_temp_new_ptr();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_new(f, tag, regtype, regid, regno): @@ -190,18 +190,18 @@ def genptr_decl_new(f, tag, regtype, regid, regno): if regid in {"s", "t"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_value[insn->regno[{regno}]];\n" + f"get_result_gpr(ctx, insn->regno[{regno}]);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_pred_value[insn->regno[{regno}]];\n" + f"ctx->new_pred_value[insn->regno[{regno}]];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": f.write( @@ -218,9 +218,9 @@ def genptr_decl_new(f, tag, regtype, regid, regno): f"tcg_constant_tl({regtype}{regid}N_num);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): @@ -232,9 +232,9 @@ def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): genptr_decl_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def genptr_decl_imm(f, immlett): @@ -266,7 +266,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_gpr[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid == "x": f.write( @@ -274,7 +274,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_pred[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( @@ -287,10 +287,10 @@ def genptr_src_read(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid != "u": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"uu", "vv", "xx"}: f.write(f" tcg_gen_gvec_mov(MO_64, {regtype}{regid}V_off,\n") @@ -311,7 +311,7 @@ def genptr_src_read(f, tag, regtype, regid): f.write(f" vreg_src_off(ctx, {regtype}{regid}N),\n") f.write(" sizeof(MMVector), sizeof(MMVector));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"s", "t", "u", "v"}: if not hex_common.skip_qemu_helper(tag): @@ -326,23 +326,23 @@ def genptr_src_read(f, tag, regtype, regid): ) f.write(" sizeof(MMQReg), sizeof(MMQReg));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_new(f, regtype, regid): if regtype == "N": if regid not in {"s", "t"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid not in {"t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid != "s": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_opn(f, regtype, regid, tag): @@ -354,9 +354,9 @@ def genptr_src_read_opn(f, regtype, regid, tag): elif hex_common.is_new_val(regtype, regid, tag): genptr_src_read_new(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): @@ -370,9 +370,9 @@ def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): f.write(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_decl_imm(f, immlett): @@ -387,7 +387,8 @@ def gen_helper_call_imm(f, immlett): def genptr_dst_write_pair(f, tag, regtype, regid): - f.write(f" gen_log_reg_write_pair({regtype}{regid}N, " f"{regtype}{regid}V);\n") + f.write(f" gen_log_reg_write_pair(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n") def genptr_dst_write(f, tag, regtype, regid): @@ -396,10 +397,11 @@ def genptr_dst_write(f, tag, regtype, regid): genptr_dst_write_pair(f, tag, regtype, regid) elif regid in {"d", "e", "x", "y"}: f.write( - f" gen_log_reg_write({regtype}{regid}N, " f"{regtype}{regid}V);\n" + f" gen_log_reg_write(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"d", "e", "x"}: f.write( @@ -407,7 +409,7 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "dd": f.write( @@ -420,9 +422,9 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): @@ -438,12 +440,12 @@ def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): f"{regtype}{regid}N, {newv});\n" ) elif regid not in {"dd", "d", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid not in {"d", "e", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_opn(f, regtype, regid, tag): @@ -466,7 +468,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): else: genptr_dst_write(f, tag, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -481,7 +483,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): ## TCGv RsV = hex_gpr[insn->regno[1]]; ## TCGv RtV = hex_gpr[insn->regno[2]]; ## <GEN> -## gen_log_reg_write(RdN, RdV); +## gen_log_reg_write(ctx, RdN, RdV); ## } ## ## where <GEN> depends on hex_common.skip_qemu_helper(tag) @@ -530,7 +532,7 @@ def gen_tcg_func(f, tag, regs, imms): elif hex_common.is_new_val(regtype, regid, tag): declared.append(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## Handle immediates for immlett, bits, immshift in imms: @@ -548,10 +550,13 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(" TCGv pkt_has_multi_cof = ") f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n") + if hex_common.need_pkt_need_commit(tag): + f.write(" TCGv pkt_need_commit = ") + f.write("tcg_constant_tl(ctx->need_commit);\n") if hex_common.need_part1(tag): f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") if hex_common.need_slot(tag): - f.write(" TCGv slot = tcg_constant_tl(insn->slot);\n") + f.write(" TCGv slotval = gen_slotval(ctx);\n") if hex_common.need_PC(tag): f.write(" TCGv PC = tcg_constant_tl(ctx->pkt->pc);\n") if hex_common.helper_needs_next_PC(tag): @@ -594,12 +599,14 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", pkt_has_multi_cof") + if hex_common.need_pkt_need_commit(tag): + f.write(", pkt_need_commit") if hex_common.need_PC(tag): f.write(", PC") if hex_common.helper_needs_next_PC(tag): f.write(", next_PC") if hex_common.need_slot(tag): - f.write(", slot") + f.write(", slotval") if hex_common.need_part1(tag): f.write(", part1") f.write(");\n") diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index d4aefe8..44bae53 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -128,6 +128,41 @@ static inline void assert_vhist_tmp(DisasContext *ctx) tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ sizeof(MMVector), sizeof(MMVector)) +#define fGEN_TCG_V6_vassign_tmp(SHORTCODE) \ + tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)) + +#define fGEN_TCG_V6_vcombine_tmp(SHORTCODE) \ + do { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } while (0) + +/* + * Vector combine + * + * Be careful that the source and dest don't overlap + */ +#define fGEN_TCG_V6_vcombine(SHORTCODE) \ + do { \ + if (VddV_off != VuV_off) { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } else { \ + intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \ + tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \ + sizeof(MMVector), sizeof(MMVector)); \ + } \ + } while (0) + /* Vector conditional move */ #define fGEN_TCG_VEC_CMOV(PRED) \ do { \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 244063b..cb2aa28 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) #define IMMUTABLE (~0) -static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { +const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { [HEX_REG_USR] = 0xc13000c0, [HEX_REG_PC] = IMMUTABLE, [HEX_REG_GP] = 0x3f, @@ -68,58 +68,72 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, } } -static TCGv get_result_gpr(DisasContext *ctx, int rnum) +TCGv get_result_gpr(DisasContext *ctx, int rnum) { - return hex_new_value[rnum]; + if (ctx->need_commit) { + if (rnum == HEX_REG_USR) { + return hex_new_value_usr; + } else { + if (ctx->new_value[rnum] == NULL) { + ctx->new_value[rnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_value[rnum], 0); + } + return ctx->new_value[rnum]; + } + } else { + return hex_gpr[rnum]; + } } static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) { TCGv_i64 result = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(result, hex_new_value[rnum], - hex_new_value[rnum + 1]); + tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum), + get_result_gpr(ctx, rnum + 1)); return result; } -void gen_log_reg_write(int rnum, TCGv val) +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) { const target_ulong reg_mask = reg_immut_masks[rnum]; gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); - tcg_gen_mov_tl(hex_new_value[rnum], val); + tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum], 1); } } -static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) +static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) { - const target_ulong reg_mask_low = reg_immut_masks[rnum]; - const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; TCGv val32 = tcg_temp_new(); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); - tcg_gen_mov_tl(hex_new_value[rnum], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); - } + gen_log_reg_write(ctx, rnum, val32); /* High word */ tcg_gen_extrh_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); - tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); + gen_log_reg_write(ctx, rnum + 1, val32); +} + +TCGv get_result_pred(DisasContext *ctx, int pnum) +{ + if (ctx->need_commit) { + if (ctx->new_pred_value[pnum] == NULL) { + ctx->new_pred_value[pnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_pred_value[pnum], 0); + } + return ctx->new_pred_value[pnum]; + } else { + return hex_pred[pnum]; } } void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { + TCGv pred = get_result_pred(ctx, pnum); TCGv base_val = tcg_temp_new(); tcg_gen_andi_tl(base_val, val, 0xff); @@ -132,12 +146,13 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) * straight assignment. Otherwise, do an and. */ if (!test_bit(pnum, ctx->pregs_written)) { - tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val); + tcg_gen_mov_tl(pred, base_val); } else { - tcg_gen_and_tl(hex_new_pred_value[pnum], - hex_new_pred_value[pnum], base_val); + tcg_gen_and_tl(pred, pred, base_val); + } + if (HEX_DEBUG) { + tcg_gen_ori_tl(ctx->pred_written, ctx->pred_written, 1 << pnum); } - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); set_bit(pnum, ctx->pregs_written); } @@ -231,7 +246,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_P3_0_ALIASED) { gen_write_p3_0(ctx, val); } else { - gen_log_reg_write(reg_num, val); + gen_log_reg_write(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; } @@ -255,7 +270,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, tcg_gen_extrh_i64_i32(val32, val); tcg_gen_mov_tl(result, val32); } else { - gen_log_reg_write_pair(reg_num, val); + gen_log_reg_write_pair(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; ctx->num_insns = 0; @@ -383,6 +398,14 @@ static inline void gen_store_conditional8(DisasContext *ctx, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static TCGv gen_slotval(DisasContext *ctx) +{ + int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1); + return tcg_constant_tl(slotval); +} +#endif + void gen_store32(TCGv vaddr, TCGv src, int width, uint32_t slot) { tcg_gen_mov_tl(hex_store_addr[slot], vaddr); @@ -457,9 +480,9 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, if (ctx->pkt->pkt_has_multi_cof) { /* If there are multiple branches in a packet, ignore the second one */ tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC], - hex_branch_taken, tcg_constant_tl(0), + ctx->branch_taken, tcg_constant_tl(0), hex_gpr[HEX_REG_PC], addr); - tcg_gen_movi_tl(hex_branch_taken, 1); + tcg_gen_movi_tl(ctx->branch_taken, 1); } else { tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr); } @@ -480,7 +503,7 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off, ctx->branch_cond = TCG_COND_ALWAYS; if (pred != NULL) { ctx->branch_cond = cond; - tcg_gen_mov_tl(hex_branch_taken, pred); + tcg_gen_mov_tl(ctx->branch_taken, pred); } ctx->branch_dest = dest; } @@ -518,6 +541,55 @@ static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2) tcg_gen_movcond_tl(cond, res, arg1, arg2, one, zero); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static inline void gen_loop0r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, 0); +} + +static void gen_loop0i(DisasContext *ctx, int count, int riV) +{ + gen_loop0r(ctx, tcg_constant_tl(count), riV); +} + +static inline void gen_loop1r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC1, RsV); + gen_log_reg_write(ctx, HEX_REG_SA1, tcg_constant_tl(ctx->pkt->pc + riV)); +} + +static void gen_loop1i(DisasContext *ctx, int count, int riV) +{ + gen_loop1r(ctx, tcg_constant_tl(count), riV); +} + +static void gen_ploopNsr(DisasContext *ctx, int N, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, N); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0)); +} + +static void gen_ploopNsi(DisasContext *ctx, int N, int count, int riV) +{ + gen_ploopNsr(ctx, N, tcg_constant_tl(count), riV); +} + +static inline void gen_comparei(TCGCond cond, TCGv res, TCGv arg1, int arg2) +{ + gen_compare(cond, res, arg1, tcg_constant_tl(arg2)); +} +#endif + static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc, TCGCond cond, TCGv pred) { @@ -547,7 +619,7 @@ static void gen_cmpnd_cmp_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond2, pred, pc_off); } } @@ -604,7 +676,7 @@ static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond, pred, pc_off); } } @@ -665,6 +737,18 @@ static void gen_cond_callr(DisasContext *ctx, gen_set_label(skip); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* frame = ((LR << 32) | FP) ^ (FRAMEKEY << 32)) */ +static TCGv_i64 gen_frame_scramble(void) +{ + TCGv_i64 frame = tcg_temp_new_i64(); + TCGv tmp = tcg_temp_new(); + tcg_gen_xor_tl(tmp, hex_gpr[HEX_REG_LR], hex_gpr[HEX_REG_FRAMEKEY]); + tcg_gen_concat_i32_i64(frame, hex_gpr[HEX_REG_FP], tmp); + return frame; +} +#endif + /* frame ^= (int64_t)FRAMEKEY << 32 */ static void gen_frame_unscramble(TCGv_i64 frame) { @@ -681,6 +765,41 @@ static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA) tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_TEUQ); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* Stack overflow check */ +static void gen_framecheck(TCGv EA, int framesize) +{ + /* Not modelled in linux-user mode */ + /* Placeholder for system mode */ +#ifndef CONFIG_USER_ONLY + g_assert_not_reached(); +#endif +} + +static void gen_allocframe(DisasContext *ctx, TCGv r29, int framesize) +{ + TCGv r30 = tcg_temp_new(); + TCGv_i64 frame; + tcg_gen_addi_tl(r30, r29, -8); + frame = gen_frame_scramble(); + gen_store8(cpu_env, r30, frame, ctx->insn->slot); + gen_log_reg_write(ctx, HEX_REG_FP, r30); + gen_framecheck(r30, framesize); + tcg_gen_subi_tl(r29, r30, framesize); +} + +static void gen_deallocframe(DisasContext *ctx, TCGv_i64 r31_30, TCGv r30) +{ + TCGv r29 = tcg_temp_new(); + TCGv_i64 frame = tcg_temp_new_i64(); + gen_load_frame(ctx, frame, r30); + gen_frame_unscramble(frame); + tcg_gen_mov_i64(r31_30, frame); + tcg_gen_addi_tl(r29, r30, 8); + gen_log_reg_write(ctx, HEX_REG_SP, r29); +} +#endif + static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src) { /* @@ -719,7 +838,7 @@ static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred) { TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond); - gen_log_reg_write_pair(HEX_REG_FP, RddV); + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); } static void gen_endloop0(DisasContext *ctx) @@ -730,15 +849,13 @@ static void gen_endloop0(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ TCGLabel *label1 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); @@ -807,14 +924,12 @@ static void gen_endloop01(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); @@ -877,6 +992,7 @@ static void gen_cmpi_jumpnv(DisasContext *ctx, /* Shift left with saturation */ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) { + TCGv tmp = tcg_temp_new(); /* In case dst == src */ TCGv usr = get_result_gpr(ctx, HEX_REG_USR); TCGv sh32 = tcg_temp_new(); TCGv dst_sar = tcg_temp_new(); @@ -901,17 +1017,17 @@ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) */ tcg_gen_andi_tl(sh32, shift_amt, 31); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, sh32, shift_amt, + tcg_gen_movcond_tl(TCG_COND_EQ, tmp, sh32, shift_amt, src, tcg_constant_tl(0)); - tcg_gen_shl_tl(dst, dst, sh32); - tcg_gen_sar_tl(dst_sar, dst, sh32); + tcg_gen_shl_tl(tmp, tmp, sh32); + tcg_gen_sar_tl(dst_sar, tmp, sh32); tcg_gen_movcond_tl(TCG_COND_LT, satval, src, tcg_constant_tl(0), min, max); tcg_gen_setcond_tl(TCG_COND_NE, ovf, dst_sar, src); tcg_gen_shli_tl(ovf, ovf, reg_field_info[USR_OVF].offset); tcg_gen_or_tl(usr, usr, ovf); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, dst, satval); + tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, tmp, satval); } static void gen_sar(TCGv dst, TCGv src, TCGv shift_amt) @@ -969,6 +1085,105 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv RdV, TCGv RsV, TCGv RtV) gen_set_label(done); } +static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV) +{ + /* + * int width = fZXTN(6, 32, (fGETWORD(1, RttV))); + * int offset = fSXTN(7, 32, (fGETWORD(0, RttV))); + * size8u_t mask = ((fCONSTLL(1) << width) - 1); + * if (offset < 0) { + * RxV = 0; + * } else { + * RxV &= ~(mask << offset); + * RxV |= ((RsV & mask) << offset); + * } + */ + + TCGv width = tcg_temp_new(); + TCGv offset = tcg_temp_new(); + TCGv_i64 mask = tcg_temp_new_i64(); + TCGv_i64 result = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 offset64 = tcg_temp_new_i64(); + TCGLabel *label = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_extrh_i64_i32(width, RttV); + tcg_gen_extract_tl(width, width, 0, 6); + tcg_gen_extrl_i64_i32(offset, RttV); + tcg_gen_sextract_tl(offset, offset, 0, 7); + /* Possible values for offset are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label); + /* For negative offsets, zero out the result */ + tcg_gen_movi_tl(RxV, 0); + tcg_gen_br(done); + gen_set_label(label); + /* At this point, possible values of offset are 0 .. 63 */ + tcg_gen_ext_i32_i64(mask, width); + tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask); + tcg_gen_subi_i64(mask, mask, 1); + tcg_gen_extu_i32_i64(result, RxV); + tcg_gen_ext_i32_i64(tmp, offset); + tcg_gen_shl_i64(tmp, mask, tmp); + tcg_gen_andc_i64(result, result, tmp); + tcg_gen_extu_i32_i64(tmp, RsV); + tcg_gen_and_i64(tmp, tmp, mask); + tcg_gen_extu_i32_i64(offset64, offset); + tcg_gen_shl_i64(tmp, tmp, offset64); + tcg_gen_or_i64(result, result, tmp); + tcg_gen_extrl_i64_i32(RxV, result); + gen_set_label(done); +} + +static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV, + TCGv_i64 RssV, TCGv RtV) +{ + /* + * for (int i = 0; i < 2; i++) { + * fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ? + * (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) : + * (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV))))); + * } + */ + TCGv shift_amt32 = tcg_temp_new(); + TCGv_i64 shift_amt64 = tcg_temp_new_i64(); + TCGv_i64 tmp64 = tcg_temp_new_i64(); + TCGv tmp32 = tcg_temp_new(); + TCGLabel *label = gen_new_label(); + TCGLabel *zero = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7); + /* Possible values of shift_amt32 are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label); + /* After branch, possible values of shift_amt32 are 1 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_sar_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(label); + tcg_gen_neg_tl(shift_amt32, shift_amt32); + /*At this point, possible values of shift_amt32 are 0 .. 64 */ + tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero); + /*At this point, possible values of shift_amt32 are 0 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_shl_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(zero); + /* When the shift_amt is 64, zero out the result */ + tcg_gen_movi_tl(RdV, 0); + gen_set_label(done); +} + static intptr_t vreg_src_off(DisasContext *ctx, int num) { intptr_t offset = offsetof(CPUHexagonState, VRegs[num]); @@ -1008,7 +1223,11 @@ static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, static intptr_t get_result_qreg(DisasContext *ctx, int qnum) { - return offsetof(CPUHexagonState, future_QRegs[qnum]); + if (ctx->need_commit) { + return offsetof(CPUHexagonState, future_QRegs[qnum]); + } else { + return offsetof(CPUHexagonState, QRegs[qnum]); + } } static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, @@ -1134,22 +1353,28 @@ void gen_sat_i32(TCGv dest, TCGv source, int width) void gen_sat_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_sat_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_sat_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32(TCGv dest, TCGv source, int width) { + TCGv tmp = tcg_temp_new(); /* In case dest == source */ TCGv max_val = tcg_constant_tl((1 << width) - 1); TCGv zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_tl(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_tl(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_tl(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_satu_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_satu_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) @@ -1162,27 +1387,33 @@ void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) void gen_sat_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_sat_i64(dest, source, width); + gen_sat_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } void gen_satu_i64(TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 max_val = tcg_constant_i64((1LL << width) - 1LL); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_i64(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_i64(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_i64(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_i64(dest, tmp); } void gen_satu_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_satu_i64(dest, source, width); + gen_satu_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 76e497a..a4b43c2 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -35,7 +35,9 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot); void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot); TCGv gen_read_reg(TCGv result, int num); TCGv gen_read_preg(TCGv pred, uint8_t num); -void gen_log_reg_write(int rnum, TCGv val); +TCGv get_result_gpr(DisasContext *ctx, int rnum); +TCGv get_result_pred(DisasContext *ctx, int pnum); +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val); void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val); void gen_set_usr_field(DisasContext *ctx, int field, TCGv val); void gen_set_usr_fieldi(DisasContext *ctx, int field, int x); @@ -58,4 +60,6 @@ void gen_set_half(int N, TCGv result, TCGv src); void gen_set_half_i64(int N, TCGv_i64 result, TCGv src); void probe_noshuf_load(TCGv va, int s, int mi); +extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS]; + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index ed7f984..fa0ebaf 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -21,7 +21,7 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) -DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) +DEF_HELPER_FLAGS_5(debug_commit_end, TCG_CALL_NO_WG, void, env, i32, int, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) @@ -29,8 +29,10 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) -DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32) DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 40f28ca..f3aac55 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -30,6 +30,9 @@ tags = [] # list of all tags overrides = {} # tags with helper overrides idef_parser_enabled = {} # tags enabled for idef-parser +def bad_register(*args): + args_str = ", ".join(map(str, args)) + raise Exception(f"Bad register parse: {args_str}") # We should do this as a hash for performance, # but to keep order let's keep it as a list. @@ -97,6 +100,12 @@ def calculate_attribs(): add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR") add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD") add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE") + add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3') # Recurse down macros, find attributes from sub-macros macroValues = list(macros.values()) @@ -241,9 +250,10 @@ def is_new_val(regtype, regid, tag): def need_slot(tag): if ( - ("A_CONDEXEC" in attribdict[tag] and "A_JUMP" not in attribdict[tag]) - or "A_STORE" in attribdict[tag] - or "A_LOAD" in attribdict[tag] + "A_CVI_SCATTER" not in attribdict[tag] + and "A_CVI_GATHER" not in attribdict[tag] + and ("A_STORE" in attribdict[tag] + or "A_LOAD" in attribdict[tag]) ): return 1 else: @@ -270,6 +280,9 @@ def need_pkt_has_multi_cof(tag): return "A_COF" in attribdict[tag] +def need_pkt_need_commit(tag): + return 'A_IMPLICIT_WRITES_USR' in attribdict[tag] + def need_condexec_reg(tag, regs): if "A_CONDEXEC" in attribdict[tag]: for regtype, regid, toss, numregs in regs: diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c index 6091286..c3f8523 100644 --- a/target/hexagon/iclass.c +++ b/target/hexagon/iclass.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -51,8 +51,10 @@ SlotMask find_iclass_slots(Opcode opcode, int itype) return SLOTS_0; } else if ((opcode == J2_trap0) || (opcode == Y2_isync) || - (opcode == J2_pause) || (opcode == J4_hintjumpr)) { + (opcode == J2_pause)) { return SLOTS_2; + } else if (opcode == J4_hintjumpr) { + return SLOTS_23; } else if (GET_ATTRIB(opcode, A_CRSLOT23)) { return SLOTS_23; } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) { diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex index 5eb8ac5..cd5958e 100644 --- a/target/hexagon/idef-parser/idef-parser.lex +++ b/target/hexagon/idef-parser/idef-parser.lex @@ -401,12 +401,39 @@ STRING_LIT \"(\\.|[^"\\])*\" } return SIGN; } -"0x"{HEX_DIGIT}+ | -{DIGIT}+ { yylval->rvalue.type = IMMEDIATE; - yylval->rvalue.bit_width = 32; - yylval->rvalue.signedness = SIGNED; +"0x"{HEX_DIGIT}+ { uint64_t value = strtoull(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; yylval->rvalue.imm.type = VALUE; - yylval->rvalue.imm.value = strtoull(yytext, NULL, 0); + yylval->rvalue.imm.value = value; + if (value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= UINT_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned int) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else if (value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= ULONG_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned long) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else { + g_assert_not_reached(); + } + return IMM; } +{DIGIT}+ { int64_t value = strtoll(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; + yylval->rvalue.imm.type = VALUE; + yylval->rvalue.imm.value = value; + if (value >= INT_MIN && value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value >= LONG_MIN && value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else { + g_assert_not_reached(); + } return IMM; } "0x"{HEX_DIGIT}+"ULL" | {DIGIT}+"ULL" { yylval->rvalue.type = IMMEDIATE; diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index 5444fd4..5c98395 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -594,8 +594,6 @@ rvalue : FAIL | CAST rvalue { @1.last_column = @2.last_column; - /* Assign target signedness */ - $2.signedness = $1.signedness; $$ = gen_cast_op(c, &@1, &$2, $1.bit_width, $1.signedness); } | rvalue EQ rvalue @@ -685,7 +683,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "SXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, SIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, SIGNED); } | ZXT '(' rvalue ',' IMM ',' rvalue ')' { @@ -693,7 +691,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "ZXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, UNSIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, UNSIGNED); } | '(' rvalue ')' { diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 8734218..7b5ebaf 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -167,8 +167,9 @@ void reg_print(Context *c, YYLTYPE *locp, HexReg *reg) EMIT(c, "hex_gpr[%u]", reg->id); } -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue) { + HexImm *imm = &rvalue->imm; switch (imm->type) { case I: EMIT(c, "i"); @@ -177,7 +178,21 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) EMIT(c, "%ciV", imm->id); break; case VALUE: - EMIT(c, "((int64_t) %" PRIu64 "ULL)", (int64_t) imm->value); + if (rvalue->bit_width == 32) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint32_t) 0x%" PRIx32 ")", (uint32_t) imm->value); + } else { + EMIT(c, "((int32_t) 0x%" PRIx32 ")", (int32_t) imm->value); + } + } else if (rvalue->bit_width == 64) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint64_t) 0x%" PRIx64 "ULL)", (uint64_t) imm->value); + } else { + EMIT(c, "((int64_t) 0x%" PRIx64 "LL)", (int64_t) imm->value); + } + } else { + g_assert_not_reached(); + } break; case QEMU_TMP: EMIT(c, "qemu_tmp_%" PRIu64, imm->index); @@ -213,7 +228,7 @@ void rvalue_print(Context *c, YYLTYPE *locp, void *pointer) tmp_print(c, locp, &rvalue->tmp); break; case IMMEDIATE: - imm_print(c, locp, &rvalue->imm); + imm_print(c, locp, rvalue); break; case VARID: var_print(c, locp, &rvalue->var); @@ -386,13 +401,10 @@ HexValue gen_rvalue_extend(Context *c, YYLTYPE *locp, HexValue *rvalue) if (rvalue->type == IMMEDIATE) { HexValue res = gen_imm_qemu_tmp(c, locp, 64, rvalue->signedness); - bool is_unsigned = (rvalue->signedness == UNSIGNED); - const char *sign_suffix = is_unsigned ? "u" : ""; gen_c_int_type(c, locp, 64, rvalue->signedness); - OUT(c, locp, " ", &res, " = "); - OUT(c, locp, "(", sign_suffix, "int64_t) "); - OUT(c, locp, "(", sign_suffix, "int32_t) "); - OUT(c, locp, rvalue, ";\n"); + OUT(c, locp, " ", &res, " = ("); + gen_c_int_type(c, locp, 64, rvalue->signedness); + OUT(c, locp, ")", rvalue, ";\n"); return res; } else { HexValue res = gen_tmp(c, locp, 64, rvalue->signedness); @@ -959,33 +971,18 @@ HexValue gen_cast_op(Context *c, unsigned target_width, HexSignedness signedness) { + HexValue res; assert_signedness(c, locp, src->signedness); if (src->bit_width == target_width) { - return *src; - } else if (src->type == IMMEDIATE) { - HexValue res = *src; - res.bit_width = target_width; - res.signedness = signedness; - return res; + res = *src; + } else if (src->bit_width < target_width) { + res = gen_rvalue_extend(c, locp, src); } else { - HexValue res = gen_tmp(c, locp, target_width, signedness); - /* Truncate */ - if (src->bit_width > target_width) { - OUT(c, locp, "tcg_gen_trunc_i64_tl(", &res, ", ", src, ");\n"); - } else { - assert_signedness(c, locp, src->signedness); - if (src->signedness == UNSIGNED) { - /* Extend unsigned */ - OUT(c, locp, "tcg_gen_extu_i32_i64(", - &res, ", ", src, ");\n"); - } else { - /* Extend signed */ - OUT(c, locp, "tcg_gen_ext_i32_i64(", - &res, ", ", src, ");\n"); - } - } - return res; + /* src->bit_width > target_width */ + res = gen_rvalue_truncate(c, locp, src); } + res.signedness = signedness; + return res; } @@ -1123,7 +1120,7 @@ HexValue gen_extend_op(Context *c, HexValue *value, HexSignedness signedness) { - unsigned bit_width = (dst_width = 64) ? 64 : 32; + unsigned bit_width = (dst_width == 64) ? 64 : 32; HexValue value_m = *value; HexValue src_width_m = *src_width; @@ -1318,7 +1315,7 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value) value_m = rvalue_materialize(c, locp, &value_m); OUT(c, locp, - "gen_log_reg_write(", ®->reg.id, ", ", + "gen_log_reg_write(ctx, ", ®->reg.id, ", ", &value_m, ");\n"); } @@ -1854,7 +1851,7 @@ HexValue gen_rvalue_pred(Context *c, YYLTYPE *locp, HexValue *pred) *pred = gen_tmp(c, locp, 32, UNSIGNED); if (is_dotnew) { OUT(c, locp, "tcg_gen_mov_i32(", pred, - ", hex_new_pred_value["); + ", ctx->new_pred_value["); OUT(c, locp, pred_str, "]);\n"); } else { OUT(c, locp, "gen_read_preg(", pred, ", ", pred_str, ");\n"); diff --git a/target/hexagon/idef-parser/parser-helpers.h b/target/hexagon/idef-parser/parser-helpers.h index 1239d23..7c58087 100644 --- a/target/hexagon/idef-parser/parser-helpers.h +++ b/target/hexagon/idef-parser/parser-helpers.h @@ -80,7 +80,7 @@ void reg_compose(Context *c, YYLTYPE *locp, HexReg *reg, char reg_id[5]); void reg_print(Context *c, YYLTYPE *locp, HexReg *reg); -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm); +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue); void var_print(Context *c, YYLTYPE *locp, HexVar *var); diff --git a/target/hexagon/imported/branch.idef b/target/hexagon/imported/branch.idef index 88f5f48..93e2e37 100644 --- a/target/hexagon/imported/branch.idef +++ b/target/hexagon/imported/branch.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,6 +34,9 @@ Q6INSN(J2_jump,"jump #r22:2",ATTRIBS(A_JDIR), "direct unconditional jump", Q6INSN(J2_jumpr,"jumpr Rs32",ATTRIBS(A_JINDIR), "indirect unconditional jump", {fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) +Q6INSN(J2_jumprh,"jumprh Rs32",ATTRIBS(A_JINDIR, A_HINTED_COF), "indirect unconditional jump", +{fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) + #define OLDCOND_JUMP(TAG,OPER,OPER2,ATTRIB,DESCR,SEMANTICS) \ Q6INSN(TAG##t,"if (Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLD(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLD(PuV)) { SEMANTICS; }}) \ Q6INSN(TAG##f,"if (!Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLDNOT(PuV)) { SEMANTICS; }}) \ @@ -196,6 +199,8 @@ Q6INSN(J2_callrt,"if (Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional Q6INSN(J2_callrf,"if (!Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional call if false", {fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0);if (fLSBOLDNOT(PuV)) { fCALLR(RsV); }}) +Q6INSN(J2_callrh,"callrh Rs32",ATTRIBS(CINDIR_STD, A_HINTED_COF), "hinted indirect unconditional call", +{ fCALLR(RsV); }) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index d71c04c..0cd30a5 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -382,14 +382,23 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd") DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd") DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") -DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP00---- -00ddddd") +DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") +DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd") +DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd") +DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") +DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") +DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd") +DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") -DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP01---- -00ddddd") +DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd") +DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd") + +DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii") DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") @@ -479,8 +488,8 @@ STD_PST_ENC(rinew, "1 101","10ttt") /* x bus/cache */ /* x store/cache */ DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii") -DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ------dd") -DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ------dd") +DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") +DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------") @@ -515,6 +524,7 @@ DEF_FIELD32(ICLASS_J" 110- -------- PP-!---- --------",J_PT,"Predict-taken") DEF_FIELDROW_DESC32(ICLASS_J" 0000 -------- PP------ --------","[#0] PC=(Rs), R31=return") DEF_ENC32(J2_callr, ICLASS_J" 0000 101sssss PP------ --------") +DEF_ENC32(J2_callrh, ICLASS_J" 0000 110sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0001 -------- PP------ --------","[#1] if (Pu) PC=(Rs), R31=return") DEF_ENC32(J2_callrt, ICLASS_J" 0001 000sssss PP----uu --------") @@ -522,6 +532,7 @@ DEF_ENC32(J2_callrf, ICLASS_J" 0001 001sssss PP----uu --------") DEF_FIELDROW_DESC32(ICLASS_J" 0010 -------- PP------ --------","[#2] PC=(Rs); ") DEF_ENC32(J2_jumpr, ICLASS_J" 0010 100sssss PP------ --------") +DEF_ENC32(J2_jumprh, ICLASS_J" 0010 110sssss PP------ --------") DEF_ENC32(J4_hintjumpr, ICLASS_J" 0010 101sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0011 -------- PP------ --------","[#3] if (Pu) PC=(Rs) ") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 237634b..5319817 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -128,6 +128,24 @@ Q6INSN(S2_allocframe,"allocframe(Rx32,#u11:3):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEM #define A_RETURN A_RESTRICT_COF_MAX1,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOSLOT1_STORE,A_RET_TYPE,A_DEALLOCRET +/**** Load Acquire Store Release Instructions****/ + + + +Q6INSN(L2_loadw_aq,"Rd32=memw_aq(Rs32)",ATTRIBS(A_REGWRSIZE_4B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_4B,A_LOAD),"Load Acquire Word", +{ fEA_REG(RsV); fLOAD(1,4,u,EA,RdV); }) +Q6INSN(L4_loadd_aq,"Rdd32=memd_aq(Rs32)",ATTRIBS(A_REGWRSIZE_8B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_8B,A_LOAD),"Load Acquire Double integer", +{ fEA_REG(RsV); fLOAD(1,8,u,EA,RddV); }) + +Q6INSN(R6_release_at_vi,"release(Rs32):at",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) +Q6INSN(R6_release_st_vi,"release(Rs32):st",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) + +Q6INSN(S2_storew_rl_at_vi,"memw_rl(Rs32):at=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_at_vi,"memd_rl(Rs32):at=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + +Q6INSN(S2_storew_rl_st_vi,"memw_rl(Rs32):st=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_st_vi,"memd_rl(Rs32):st=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + Q6INSN(L2_deallocframe,"Rdd32=deallocframe(Rs32):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_DEALLOCFRAME), "Deallocate stack frame", { fHIDE(size8u_t tmp;) fEA_REG(RsV); fLOAD(1,8,u,EA,tmp); diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 6fbbe2c..402438f 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -257,6 +257,11 @@ DEF_ENC(V6_vasruhubrndsat, ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 111 ddd DEF_ENC(V6_vasruwuhsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vasruhubsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 101 ddddd") // +DEF_ENC(V6_vasrvuhubrndsat,"00011101000vvvvvPP0uuuuu011ddddd") +DEF_ENC(V6_vasrvuhubsat,"00011101000vvvvvPP0uuuuu010ddddd") +DEF_ENC(V6_vasrvwuhrndsat,"00011101000vvvvvPP0uuuuu001ddddd") +DEF_ENC(V6_vasrvwuhsat,"00011101000vvvvvPP0uuuuu000ddddd") + /*************************************************************** * * Group #1, Uses Q6 Rt32 @@ -716,6 +721,7 @@ DEF_ENC(V6_vaddclbw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 001 ddddd") // DEF_ENC(V6_vavguw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 010 ddddd") // DEF_ENC(V6_vavguwrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 011 ddddd") // +DEF_ENC(V6_vassign_tmp,"00011110--0---01PP0uuuuu110ddddd") DEF_ENC(V6_vavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vavgbrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 101 ddddd") // DEF_ENC(V6_vnavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 110 ddddd") // @@ -730,6 +736,8 @@ DEF_ENC(V6_vmaxb, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vsatuwuh, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vdealb4w, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_v6mpyvubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii xxxxx") +DEF_ENC(V6_v6mpyhubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii xxxxx") DEF_ENC(V6_vmpyowh_rnd, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vshuffeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") // @@ -739,6 +747,11 @@ DEF_ENC(V6_vshufoh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_vcombine_tmp,"00011110101vvvvvPP0uuuuu111ddddd") + +DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd") +DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd") + DEF_ENC(V6_vmpyieoh, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vadduwsat, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") // @@ -789,6 +802,7 @@ DEF_ENC(V6_vrounduhub, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 011 ddddd") // DEF_ENC(V6_vrounduwuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd") +DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd") #endif /* NO MMVEC */ diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index 8ca5a60..ead32c2 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -62,6 +62,9 @@ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_SHIFT3_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS,A_CVI_VS_3SRC,A_NOTE_SHIFT_RESOURCE,A_NOTE_NOVP,A_NOTE_VA_UNARY), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) #define ITERATOR_INSN_SHIFT_SLOT_VV_LATE(WIDTH,TAG,SYNTAX,DESCR,CODE) \ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ @@ -116,6 +119,10 @@ ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE) EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) + #define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \ ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE) @@ -976,6 +983,22 @@ NARROWING_SHIFT(16,vasrhubrndsat,fSETBYTE,ub,h,:rnd:sat,fVSATUB,fVROUND,0x7) NARROWING_SHIFT(16,vasrhbsat,fSETBYTE,b,h,:sat,fVSATB,fVNOROUND,0x7) NARROWING_SHIFT(16,vasrhbrndsat,fSETBYTE,b,h,:rnd:sat,fVSATB,fVROUND,0x7) +#define NARROWING_VECTOR_SHIFT(ITERSIZE,TAG,DSTM,DSTTYPE,SRCTYPE,SRCTYPE2,SYNOPTS,SATFUNC,RNDFUNC,SHAMTMASK) \ +ITERATOR_INSN_SHIFT3_SLOT(ITERSIZE,TAG, \ +"Vd32." #DSTTYPE "=vasr(Vuu32." #SRCTYPE ",Vv32." #SRCTYPE2 ")" #SYNOPTS, \ +"Vector shift by vector right and shuffle", \ + fHIDE(int )shamt = VvV.SRCTYPE2[2*i+0] & SHAMTMASK; \ + DSTM(0,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[0].SRCTYPE[i],shamt) >> shamt)); \ + shamt = VvV.SRCTYPE2[2*i+1] & SHAMTMASK; \ + DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[1].SRCTYPE[i],shamt) >> shamt))) + +/* WORD TO HALF*/ +NARROWING_VECTOR_SHIFT(32,vasrvwuhsat,fSETHALF,uh,w,uh,:sat,fVSATUH,fVNOROUND,0xF) +NARROWING_VECTOR_SHIFT(32,vasrvwuhrndsat,fSETHALF,uh,w,uh,:rnd:sat,fVSATUH,fVROUND,0xF) +/* HALF TO BYTE*/ +NARROWING_VECTOR_SHIFT(16,vasrvuhubsat,fSETBYTE,ub,uh,ub,:sat,fVSATUB,fVNOROUND,0x7) +NARROWING_VECTOR_SHIFT(16,vasrvuhubrndsat,fSETBYTE,ub,uh,ub,:rnd:sat,fVSATUB,fVROUND,0x7) + NARROWING_SHIFT_NOV1(16,vasruhubsat,fSETBYTE,ub,uh,:sat,fVSATUB,fVNOROUND,0x7) NARROWING_SHIFT_NOV1(16,vasruhubrndsat,fSETBYTE,ub,uh,:rnd:sat,fVSATUB,fVROUND,0x7) @@ -1360,6 +1383,9 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyhvsrs,"Vd32=vmpyh(Vu32,Vv32):<<1:rnd:s +ITERATOR_INSN_MPY_SLOT(16,vmpyuhvs, "Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16", +"Vector by Vector Unsigned Halfword Multiply with 16 bit rightshift", + VdV.uh[i] = fGETUHALF(1,fMPY16UU(VuV.uh[i],VvV.uh[i]))) ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus, "Vdd32=vmpyhus(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.uh)", @@ -2038,6 +2064,24 @@ ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8,vcombine,"Vdd32=vcombine(Vu32,Vv32)", /////////////////////////////////////////////////////////////////////////// +EXTINSN(V6_vcombine_tmp, "Vdd32.tmp=vcombine(Vu32,Vv32)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(8, i) { + VddV.v[0].ub[i] = VvV.ub[i]; + VddV.v[1].ub[i] = VuV.ub[i]; + } +}) + +EXTINSN(V6_vassign_tmp, "Vd32.tmp=Vu32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(32, i) { + VdV.w[i]=VuV.w[i]; + } +}) /********************************************************* * GENERAL PERMUTE NETWORKS @@ -2507,6 +2551,281 @@ EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSIO }) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + EXTINSN(V6_vscattermhwq, "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords conditional", { diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index b1bfadc..d732b6b 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -33,6 +33,8 @@ int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n); +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n); void hexagon_debug_vreg(CPUHexagonState *env, int regnum); void hexagon_debug_qreg(CPUHexagonState *env, int regnum); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 760630d..5451b06 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -44,8 +44,17 @@ reg_field_info[FIELD].offset) #define SET_USR_FIELD(FIELD, VAL) \ - fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \ - reg_field_info[FIELD].offset, (VAL)) + do { \ + if (pkt_need_commit) { \ + fINSERT_BITS(env->new_value_usr, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } else { \ + fINSERT_BITS(env->gpr[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } \ + } while (0) #endif #ifdef QEMU_GENERATE @@ -164,14 +173,14 @@ #define MEM_STORE8(VA, DATA, SLOT) \ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #else -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, slot, VA)) -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, slot, VA)) +#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT) #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT) @@ -227,12 +236,8 @@ static inline void gen_cancel(uint32_t slot) #ifdef QEMU_GENERATE #define fLSBNEW(PVAL) tcg_gen_andi_tl(LSB, (PVAL), 1) -#define fLSBNEW0 tcg_gen_andi_tl(LSB, hex_new_pred_value[0], 1) -#define fLSBNEW1 tcg_gen_andi_tl(LSB, hex_new_pred_value[1], 1) #else #define fLSBNEW(PVAL) ((PVAL) & 1) -#define fLSBNEW0 (env->new_pred_value[0] & 1) -#define fLSBNEW1 (env->new_pred_value[1] & 1) #endif #ifdef QEMU_GENERATE @@ -347,10 +352,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fREAD_LR() (env->gpr[HEX_REG_LR]) -#define fWRITE_LR(A) log_reg_write(env, HEX_REG_LR, A) -#define fWRITE_FP(A) log_reg_write(env, HEX_REG_FP, A) -#define fWRITE_SP(A) log_reg_write(env, HEX_REG_SP, A) - #define fREAD_SP() (env->gpr[HEX_REG_SP]) #define fREAD_LC0 (env->gpr[HEX_REG_LC0]) #define fREAD_LC1 (env->gpr[HEX_REG_LC1]) @@ -375,24 +376,10 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC) #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR) #define fHINTJR(TARGET) { /* Not modelled in qemu */} -#define fWRITE_LOOP_REGS0(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC0, COUNT); \ - log_reg_write(env, HEX_REG_SA0, START); \ - } while (0) -#define fWRITE_LOOP_REGS1(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC1, COUNT); \ - log_reg_write(env, HEX_REG_SA1, START);\ - } while (0) #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG)) -#define fWRITE_P0(VAL) log_pred_write(env, 0, VAL) -#define fWRITE_P1(VAL) log_pred_write(env, 1, VAL) -#define fWRITE_P2(VAL) log_pred_write(env, 2, VAL) -#define fWRITE_P3(VAL) log_pred_write(env, 3, VAL) #define fPART1(WORK) if (part1) { WORK; return; } #define fCAST4u(A) ((uint32_t)(A)) #define fCAST4s(A) ((int32_t)(A)) @@ -661,7 +648,11 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) reg_field_info[FIELD].offset) #ifdef QEMU_GENERATE -#define fDCZEROA(REG) tcg_gen_mov_tl(hex_dczero_addr, (REG)) +#define fDCZEROA(REG) \ + do { \ + ctx->dczero_addr = tcg_temp_new(); \ + tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \ + } while (0) #endif #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \ diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index 1201d77..a655634 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -346,4 +346,11 @@ #define fUARCH_NOTE_PUMP_2X() #define IV1DEAD() + +#define fGET10BIT(COE, VAL, POS) \ + do { \ + COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \ + extract32(VAL, POS * 8, 8); \ + } while (0); + #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 3cc71b6..12967ac 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -52,38 +52,6 @@ G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val) -{ - HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")", - rnum, val, val); - if (val == env->gpr[rnum]) { - HEX_DEBUG_LOG(" NO CHANGE"); - } - HEX_DEBUG_LOG("\n"); - - env->new_value[rnum] = val; - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - env->reg_written[rnum] = 1; - } -} - -static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) -{ - HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld - " (0x" TARGET_FMT_lx ")\n", - pnum, val, val); - - /* Multiple writes to the same preg are and'ed together */ - if (env->pred_written & (1 << pnum)) { - env->new_pred_value[pnum] &= val & 0xff; - } else { - env->new_pred_value[pnum] = val & 0xff; - env->pred_written |= 1 << pnum; - } -} - void log_store32(CPUHexagonState *env, target_ulong addr, target_ulong val, int width, int slot) { @@ -235,14 +203,14 @@ static void print_store(CPUHexagonState *env, int slot) } /* This function is a handy place to set a breakpoint */ -void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) +void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC, + int pred_written, int has_st0, int has_st1) { bool reg_printed = false; bool pred_printed = false; int i; - HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", - env->this_PC); + HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC); HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled); for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { @@ -252,18 +220,18 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) reg_printed = true; } HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", - i, env->new_value[i], env->new_value[i]); + i, env->gpr[i], env->gpr[i]); } } for (i = 0; i < NUM_PREGS; i++) { - if (env->pred_written & (1 << i)) { + if (pred_written & (1 << i)) { if (!pred_printed) { HEX_DEBUG_LOG("Predicates written\n"); pred_printed = true; } HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n", - i, env->new_pred_value[i]); + i, env->pred[i]); } } @@ -384,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) } int64_t HELPER(vacsh_val)(CPUHexagonState *env, - int64_t RxxV, int64_t RssV, int64_t RttV) + int64_t RxxV, int64_t RssV, int64_t RttV, + uint32_t pkt_need_commit) { for (int i = 0; i < 4; i++) { int xv = sextract64(RxxV, i * 16, 16); @@ -416,6 +385,87 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env, return PeV; } +int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV) +{ + int64_t RddV = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + RddV = AC_next_state_MPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, valMPS); + fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23)); + fSETWORD(1, RddV, offset); + } + /* least probable region */ + else { + RddV = AC_next_state_LPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS))); + fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23)); + fSETWORD(1, RddV, (offset - rMPS)); + } + return RddV; +} + +int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV) +{ + int32_t p0 = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + p0 = valMPS; + + } + /* least probable region */ + else { + p0 = valMPS ^ 1; + } + return p0; +} + static void probe_store(CPUHexagonState *env, int slot, int mmu_idx, bool is_predicated) { @@ -516,41 +566,45 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask) * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr, int size) +static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr, int size) { - if (slot == 0 && env->pkt_has_store_s1 && + if (slot == 0 && pkt_has_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX); HELPER(commit_store)(env, 1); } } -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 1); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1); return cpu_ldub_data_ra(env, vaddr, ra); } -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 2); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2); return cpu_lduw_data_ra(env, vaddr, ra); } -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 4); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4); return cpu_ldl_data_ra(env, vaddr, ra); } -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 8); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8); return cpu_ldq_data_ra(env, vaddr, ra); } diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index db22b54..8f3764d 100644 --- a/target/hexagon/op_helper.h +++ b/target/hexagon/op_helper.h @@ -19,15 +19,15 @@ #define HEXAGON_OP_HELPER_H /* Misc functions */ -void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr); +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); - -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val); void log_store64(CPUHexagonState *env, target_ulong addr, int64_t val, int width, int slot); void log_store32(CPUHexagonState *env, target_ulong addr, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index cddd7c5..b18f1a9 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -27,6 +27,7 @@ #include "insn.h" #include "decode.h" #include "translate.h" +#include "genptr.h" #include "printinsn.h" #include "analyze_funcs_generated.c.inc" @@ -40,19 +41,13 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; -TCGv hex_this_PC; TCGv hex_slot_cancelled; -TCGv hex_branch_taken; -TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -TCGv hex_new_pred_value[NUM_PREGS]; -TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; TCGv hex_store_val32[STORES_MAX]; TCGv_i64 hex_store_val64[STORES_MAX]; -TCGv hex_pkt_has_store_s1; -TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; @@ -69,6 +64,10 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, { intptr_t offset; + if (!ctx->need_commit) { + return offsetof(CPUHexagonState, VRegs[regnum]); + } + /* See if it is already allocated */ for (int i = 0; i < ctx->future_vregs_idx; i++) { if (ctx->future_vregs_num[i] == regnum) { @@ -154,7 +153,7 @@ static void gen_end_tb(DisasContext *ctx) if (ctx->branch_cond != TCG_COND_NEVER) { if (ctx->branch_cond != TCG_COND_ALWAYS) { TCGLabel *skip = gen_new_label(); - tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip); + tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); gen_goto_tb(ctx, 0, ctx->branch_dest, true); gen_set_label(skip); gen_goto_tb(ctx, 1, ctx->next_PC, false); @@ -262,11 +261,6 @@ static bool need_slot_cancelled(Packet *pkt) return false; } -static bool need_pred_written(Packet *pkt) -{ - return check_for_attrib(pkt, A_WRITES_PRED_REG); -} - static bool need_next_PC(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -341,10 +335,131 @@ static void mark_implicit_pred_writes(DisasContext *ctx) mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); } +static bool pkt_raises_exception(Packet *pkt) +{ + if (check_for_attrib(pkt, A_LOAD) || + check_for_attrib(pkt, A_STORE)) { + return true; + } + return false; +} + +static bool need_commit(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + + /* + * If the short-circuit property is set to false, we'll always do the commit + */ + if (!ctx->short_circuit) { + return true; + } + + if (pkt_raises_exception(pkt)) { + return true; + } + + /* Registers with immutability flags require new_value */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (reg_immut_masks[rnum]) { + return true; + } + } + + /* Floating point instructions are hard-coded to use new_value */ + if (check_for_attrib(pkt, A_FPOP)) { + return true; + } + + if (pkt->num_insns == 1) { + if (pkt->pkt_has_hvx) { + /* + * The HVX instructions with generated helpers use + * pass-by-reference, so they need the read/write overlap + * check below. + * The HVX instructions with overrides are OK. + */ + if (!ctx->has_hvx_helper) { + return false; + } + } else { + return false; + } + } + + /* Check for overlap between register reads and writes */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (test_bit(rnum, ctx->regs_read)) { + return true; + } + } + + /* Check for overlap between predicate reads and writes */ + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pnum = ctx->preg_log[i]; + if (test_bit(pnum, ctx->pregs_read)) { + return true; + } + } + + /* Check for overlap between HVX reads and writes */ + for (int i = 0; i < ctx->vreg_log_idx; i++) { + int vnum = ctx->vreg_log[i]; + if (test_bit(vnum, ctx->vregs_read)) { + return true; + } + } + if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1); + } + } + if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_select, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1); + } + } + + /* Check for overlap between HVX predicate reads and writes */ + for (int i = 0; i < ctx->qreg_log_idx; i++) { + int qnum = ctx->qreg_log[i]; + if (test_bit(qnum, ctx->qregs_read)) { + return true; + } + } + + return false; +} + +static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) +{ + if (GET_ATTRIB(ctx->insn->opcode, attrib)) { + ctx_log_pred_read(ctx, pnum); + } +} + +static void mark_implicit_pred_reads(DisasContext *ctx) +{ + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); +} + static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; - ctx->need_pkt_has_store_s1 = false; + ctx->has_hvx_helper = false; for (int i = 0; i < pkt->num_insns; i++) { Insn *insn = &pkt->insn[i]; ctx->insn = insn; @@ -353,7 +468,10 @@ static void analyze_packet(DisasContext *ctx) } mark_implicit_reg_writes(ctx); mark_implicit_pred_writes(ctx); + mark_implicit_pred_reads(ctx); } + + ctx->need_commit = need_commit(ctx); } static void gen_start_packet(DisasContext *ctx) @@ -366,9 +484,11 @@ static void gen_start_packet(DisasContext *ctx) ctx->next_PC = next_PC; ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); + bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); + bitmap_zero(ctx->pregs_read, NUM_PREGS); ctx->future_vregs_idx = 0; ctx->tmp_vregs_idx = 0; ctx->vreg_log_idx = 0; @@ -377,19 +497,23 @@ static void gen_start_packet(DisasContext *ctx) bitmap_zero(ctx->vregs_select, NUM_VREGS); bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); + bitmap_zero(ctx->vregs_read, NUM_VREGS); + bitmap_zero(ctx->qregs_read, NUM_QREGS); ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } ctx->s1_store_processed = false; ctx->pre_commit = true; + for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { + ctx->new_value[i] = NULL; + } + for (i = 0; i < NUM_PREGS; i++) { + ctx->new_pred_value[i] = NULL; + } analyze_packet(ctx); - if (ctx->need_pkt_has_store_s1) { - tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); - } - /* * pregs_written is used both in the analyze phase as well as the code * gen phase, so clear it again. @@ -399,35 +523,50 @@ static void gen_start_packet(DisasContext *ctx) if (HEX_DEBUG) { /* Handy place to set a breakpoint before the packet executes */ gen_helper_debug_start_packet(cpu_env); - tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); } /* Initialize the runtime state for packet semantics */ if (need_slot_cancelled(pkt)) { tcg_gen_movi_tl(hex_slot_cancelled, 0); } + ctx->branch_taken = NULL; if (pkt->pkt_has_cof) { + ctx->branch_taken = tcg_temp_new(); if (pkt->pkt_has_multi_cof) { - tcg_gen_movi_tl(hex_branch_taken, 0); + tcg_gen_movi_tl(ctx->branch_taken, 0); } if (need_next_PC(ctx)) { tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); } } - if (need_pred_written(pkt)) { - tcg_gen_movi_tl(hex_pred_written, 0); + if (HEX_DEBUG) { + ctx->pred_written = tcg_temp_new(); + tcg_gen_movi_tl(ctx->pred_written, 0); } - /* Preload the predicated registers into hex_new_value[i] */ - if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { + /* Preload the predicated registers into get_result_gpr(ctx, i) */ + if (ctx->need_commit && + !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); while (i < TOTAL_PER_THREAD_REGS) { - tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); + tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, i + 1); } } + /* + * Preload the predicated pred registers into hex_new_pred_value[pred_num] + * Only endloop instructions conditionally write to pred registers + */ + if (ctx->need_commit && pkt->pkt_has_endloop) { + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + ctx->new_pred_value[pred_num] = tcg_temp_new(); + tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); + } + } + /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); @@ -481,6 +620,9 @@ static void mark_store_width(DisasContext *ctx) uint8_t width = 0; if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { + if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { + return; + } if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { width |= 1; } @@ -515,10 +657,15 @@ static void gen_reg_writes(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + return; + } + for (i = 0; i < ctx->reg_log_idx; i++) { int reg_num = ctx->reg_log[i]; - tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); + tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); /* * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. @@ -532,41 +679,14 @@ static void gen_reg_writes(DisasContext *ctx) static void gen_pred_writes(DisasContext *ctx) { - int i; - - /* Early exit if the log is empty */ - if (!ctx->preg_log_idx) { + /* Early exit if not needed or the log is empty */ + if (!ctx->need_commit || !ctx->preg_log_idx) { return; } - /* - * Only endloop instructions will conditionally - * write a predicate. If there are no endloop - * instructions, we can use the non-conditional - * write of the predicates. - */ - if (ctx->pkt->pkt_has_endloop) { - TCGv zero = tcg_constant_tl(0); - TCGv pred_written = tcg_temp_new(); - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - - tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num); - tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num], - pred_written, zero, - hex_new_pred_value[pred_num], - hex_pred[pred_num]); - } - } else { - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, - 1 << pred_num); - } - } + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); } } @@ -692,7 +812,7 @@ static void process_dczeroa(DisasContext *ctx) TCGv addr = tcg_temp_new(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f); + tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); @@ -719,6 +839,12 @@ static void gen_commit_hvx(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + g_assert(!pkt_has_hvx_store(ctx->pkt)); + return; + } + /* * for (i = 0; i < ctx->vreg_log_idx; i++) { * int rnum = ctx->vreg_log[i]; @@ -873,7 +999,8 @@ static void gen_commit_packet(DisasContext *ctx) tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); /* Handy place to set a breakpoint at the end of execution */ - gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); + gen_helper_debug_commit_end(cpu_env, tcg_constant_tl(ctx->pkt->pc), + ctx->pred_written, has_st0, has_st1); } if (pkt->vhist_insn != NULL) { @@ -920,6 +1047,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr); uint32_t hex_flags = dcbase->tb->flags; ctx->mem_idx = MMU_USER_IDX; @@ -928,6 +1056,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); + ctx->short_circuit = hex_cpu->short_circuit; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1028,9 +1157,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, } #define NAME_LEN 64 -static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; static char store_val32_names[STORES_MAX][NAME_LEN]; @@ -1050,11 +1177,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]); - snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); - hex_new_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_value[i]), - new_value_names[i]); - if (HEX_DEBUG) { snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", hexagon_regnames[i]); @@ -1063,29 +1185,16 @@ void hexagon_translate_init(void) reg_written_names[i]); } } + hex_new_value_usr = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); + for (i = 0; i < NUM_PREGS; i++) { hex_pred[i] = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, pred[i]), hexagon_prednames[i]); - - snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s", - hexagon_prednames[i]); - hex_new_pred_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_pred_value[i]), - new_pred_value_names[i]); } - hex_pred_written = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pred_written), "pred_written"); - hex_this_PC = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, this_PC), "this_PC"); hex_slot_cancelled = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); - hex_branch_taken = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, branch_taken), "branch_taken"); - hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1"); - hex_dczero_addr = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); hex_llsc_addr = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); hex_llsc_val = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 4b9f21c..4dd59c6 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -38,10 +38,12 @@ typedef struct DisasContext { int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); + DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); + DECLARE_BITMAP(pregs_read, NUM_PREGS); uint8_t store_width[STORES_MAX]; bool s1_store_processed; int future_vregs_idx; @@ -55,13 +57,22 @@ typedef struct DisasContext { DECLARE_BITMAP(vregs_select, NUM_VREGS); DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); + DECLARE_BITMAP(vregs_read, NUM_VREGS); int qreg_log[NUM_QREGS]; int qreg_log_idx; + DECLARE_BITMAP(qregs_read, NUM_QREGS); bool pre_commit; + bool need_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; - bool need_pkt_has_store_s1; + bool short_circuit; + bool has_hvx_helper; + TCGv new_value[TOTAL_PER_THREAD_REGS]; + TCGv new_pred_value[NUM_PREGS]; + TCGv pred_written; + TCGv branch_taken; + TCGv dczero_addr; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -73,6 +84,11 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) } } +static inline void ctx_log_pred_read(DisasContext *ctx, int pnum) +{ + set_bit(pnum, ctx->pregs_read); +} + static inline void ctx_log_reg_write(DisasContext *ctx, int rnum, bool is_predicated) { @@ -99,6 +115,17 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum, ctx_log_reg_write(ctx, rnum + 1, is_predicated); } +static inline void ctx_log_reg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->regs_read); +} + +static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_reg_read(ctx, rnum); + ctx_log_reg_read(ctx, rnum + 1); +} + intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, int num, bool alloc_ok); intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, @@ -139,6 +166,17 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx, ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated); } +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->vregs_read); +} + +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_vreg_read(ctx, rnum ^ 0); + ctx_log_vreg_read(ctx, rnum ^ 1); +} + static inline void ctx_log_qreg_write(DisasContext *ctx, int rnum) { @@ -146,20 +184,20 @@ static inline void ctx_log_qreg_write(DisasContext *ctx, ctx->qreg_log_idx++; } +static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum) +{ + set_bit(qnum, ctx->qregs_read); +} + extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; extern TCGv hex_pred[NUM_PREGS]; -extern TCGv hex_this_PC; extern TCGv hex_slot_cancelled; -extern TCGv hex_branch_taken; -extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -extern TCGv hex_new_pred_value[NUM_PREGS]; -extern TCGv hex_pred_written; extern TCGv hex_store_addr[STORES_MAX]; extern TCGv hex_store_width[STORES_MAX]; extern TCGv hex_store_val32[STORES_MAX]; extern TCGv_i64 hex_store_val64[STORES_MAX]; -extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; |