diff options
140 files changed, 6233 insertions, 3175 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 03674d5..4948729 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1356,7 +1356,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, MemoryRegionSection *section; MemoryRegion *mr; uint64_t val; - bool locked = false; MemTxResult r; section = iotlb_to_section(cpu, full->xlat_section, full->attrs); @@ -1367,11 +1366,11 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, cpu_io_recompile(cpu, retaddr); } - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); - locked = true; + { + QEMU_IOTHREAD_LOCK_GUARD(); + r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); } - r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs); + if (r != MEMTX_OK) { hwaddr physaddr = mr_offset + section->offset_within_address_space - @@ -1380,10 +1379,6 @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full, cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type, mmu_idx, full->attrs, r, retaddr); } - if (locked) { - qemu_mutex_unlock_iothread(); - } - return val; } @@ -1410,7 +1405,6 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, hwaddr mr_offset; MemoryRegionSection *section; MemoryRegion *mr; - bool locked = false; MemTxResult r; section = iotlb_to_section(cpu, full->xlat_section, full->attrs); @@ -1427,11 +1421,11 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, */ save_iotlb_data(cpu, section, mr_offset); - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); - locked = true; + { + QEMU_IOTHREAD_LOCK_GUARD(); + r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); } - r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs); + if (r != MEMTX_OK) { hwaddr physaddr = mr_offset + section->offset_within_address_space - @@ -1441,9 +1435,6 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full, MMU_DATA_STORE, mmu_idx, full->attrs, r, retaddr); } - if (locked) { - qemu_mutex_unlock_iothread(); - } } static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs) diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index 80dff68..c7d6514 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -258,10 +258,13 @@ static TCGOp *rm_ops(TCGOp *op) static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op) { - *begin_op = QTAILQ_NEXT(*begin_op, link); - tcg_debug_assert(*begin_op); - op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc); - memcpy(op->args, (*begin_op)->args, sizeof(op->args)); + TCGOp *old_op = QTAILQ_NEXT(*begin_op, link); + unsigned nargs = old_op->nargs; + + *begin_op = old_op; + op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs); + memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs); + return op; } @@ -381,32 +384,23 @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func, void *func, int *cb_idx) { + TCGOp *old_op; + int func_idx; + /* copy all ops until the call */ do { op = copy_op_nocheck(begin_op, op); } while (op->opc != INDEX_op_call); /* fill in the op call */ - op->param1 = (*begin_op)->param1; - op->param2 = (*begin_op)->param2; + old_op = *begin_op; + TCGOP_CALLI(op) = TCGOP_CALLI(old_op); + TCGOP_CALLO(op) = TCGOP_CALLO(old_op); tcg_debug_assert(op->life == 0); - if (*cb_idx == -1) { - int i; - /* - * Instead of working out the position of the callback in args[], just - * look for @empty_func, since it should be a unique pointer. - */ - for (i = 0; i < MAX_OPC_PARAM_ARGS; i++) { - if ((uintptr_t)(*begin_op)->args[i] == (uintptr_t)empty_func) { - *cb_idx = i; - break; - } - } - tcg_debug_assert(i < MAX_OPC_PARAM_ARGS); - } - op->args[*cb_idx] = (uintptr_t)func; - op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1]; + func_idx = TCGOP_CALLO(op) + TCGOP_CALLI(op); + *cb_idx = func_idx; + op->args[func_idx] = (uintptr_t)func; return op; } @@ -424,11 +418,11 @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb, op = copy_const_ptr(&begin_op, op, cb->userp); /* copy the ld_i32, but note that we only have to copy it once */ - begin_op = QTAILQ_NEXT(begin_op, link); - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); if (*cb_idx == -1) { - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); - memcpy(op->args, begin_op->args, sizeof(op->args)); + op = copy_op(&begin_op, op, INDEX_op_ld_i32); + } else { + begin_op = QTAILQ_NEXT(begin_op, link); + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); } /* call */ @@ -471,11 +465,11 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb, op = copy_const_ptr(&begin_op, op, cb->userp); /* copy the ld_i32, but note that we only have to copy it once */ - begin_op = QTAILQ_NEXT(begin_op, link); - tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); if (*cb_idx == -1) { - op = tcg_op_insert_after(tcg_ctx, op, INDEX_op_ld_i32); - memcpy(op->args, begin_op->args, sizeof(op->args)); + op = copy_op(&begin_op, op, INDEX_op_ld_i32); + } else { + begin_op = QTAILQ_NEXT(begin_op, link); + tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32); } /* extu_tl_i64 */ diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 1b8e860..b3d6529 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -1024,43 +1024,51 @@ void tb_invalidate_phys_page(tb_page_addr_t addr) */ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) { - assert(pc != 0); -#ifdef TARGET_HAS_PRECISE_SMC - assert_memory_lock(); - { - TranslationBlock *current_tb = tcg_tb_lookup(pc); - bool current_tb_modified = false; - TranslationBlock *tb; - PageForEachNext n; + TranslationBlock *current_tb; + bool current_tb_modified; + TranslationBlock *tb; + PageForEachNext n; - addr &= TARGET_PAGE_MASK; + /* + * Without precise smc semantics, or when outside of a TB, + * we can skip to invalidate. + */ +#ifndef TARGET_HAS_PRECISE_SMC + pc = 0; +#endif + if (!pc) { + tb_invalidate_phys_page(addr); + return false; + } - PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { - if (current_tb == tb && - (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { - /* - * If we are modifying the current TB, we must stop its - * execution. We could be more precise by checking that - * the modification is after the current PC, but it would - * require a specialized function to partially restore - * the CPU state. - */ - current_tb_modified = true; - cpu_restore_state_from_tb(current_cpu, current_tb, pc); - } - tb_phys_invalidate__locked(tb); + assert_memory_lock(); + current_tb = tcg_tb_lookup(pc); + + addr &= TARGET_PAGE_MASK; + current_tb_modified = false; + + PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) { + if (current_tb == tb && + (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { + /* + * If we are modifying the current TB, we must stop its + * execution. We could be more precise by checking that + * the modification is after the current PC, but it would + * require a specialized function to partially restore + * the CPU state. + */ + current_tb_modified = true; + cpu_restore_state_from_tb(current_cpu, current_tb, pc); } + tb_phys_invalidate__locked(tb); + } - if (current_tb_modified) { - /* Force execution of one insn next time. */ - CPUState *cpu = current_cpu; - cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); - return true; - } + if (current_tb_modified) { + /* Force execution of one insn next time. */ + CPUState *cpu = current_cpu; + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); + return true; } -#else - tb_invalidate_phys_page(addr); -#endif /* TARGET_HAS_PRECISE_SMC */ return false; } #else diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index a3cecda..a8eb63a 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -22,6 +22,7 @@ #include "exec/exec-all.h" #include "tcg/tcg.h" #include "qemu/bitops.h" +#include "qemu/rcu.h" #include "exec/cpu_ldst.h" #include "exec/translate-all.h" #include "exec/helper-proto.h" @@ -136,6 +137,7 @@ bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, } typedef struct PageFlagsNode { + struct rcu_head rcu; IntervalTreeNode itree; int flags; } PageFlagsNode; @@ -266,7 +268,7 @@ static bool pageflags_unset(target_ulong start, target_ulong last) } } else if (p_last <= last) { /* Range completely covers node -- remove it. */ - g_free(p); + g_free_rcu(p, rcu); } else { /* Truncate the node from the start. */ p->itree.start = last + 1; @@ -311,7 +313,7 @@ static void pageflags_create_merge(target_ulong start, target_ulong last, if (prev) { if (next) { prev->itree.last = next->itree.last; - g_free(next); + g_free_rcu(next, rcu); } else { prev->itree.last = last; } @@ -376,7 +378,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, p->flags = merge_flags; } else { interval_tree_remove(&p->itree, &pageflags_root); - g_free(p); + g_free_rcu(p, rcu); } goto done; } @@ -421,7 +423,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, p->flags = merge_flags; } else { interval_tree_remove(&p->itree, &pageflags_root); - g_free(p); + g_free_rcu(p, rcu); } if (p_last < last) { start = p_last + 1; @@ -462,7 +464,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last, p->itree.start = last + 1; interval_tree_insert(&p->itree, &pageflags_root); } else { - g_free(p); + g_free_rcu(p, rcu); goto restart; } if (set_flags) { @@ -523,6 +525,8 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) int page_check_range(target_ulong start, target_ulong len, int flags) { target_ulong last; + int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ + int ret; if (len == 0) { return 0; /* trivial length */ @@ -533,42 +537,67 @@ int page_check_range(target_ulong start, target_ulong len, int flags) return -1; /* wrap around */ } + locked = have_mmap_lock(); while (true) { PageFlagsNode *p = pageflags_find(start, last); int missing; if (!p) { - return -1; /* entire region invalid */ + if (!locked) { + /* + * Lockless lookups have false negatives. + * Retry with the lock held. + */ + mmap_lock(); + locked = -1; + p = pageflags_find(start, last); + } + if (!p) { + ret = -1; /* entire region invalid */ + break; + } } if (start < p->itree.start) { - return -1; /* initial bytes invalid */ + ret = -1; /* initial bytes invalid */ + break; } missing = flags & ~p->flags; if (missing & PAGE_READ) { - return -1; /* page not readable */ + ret = -1; /* page not readable */ + break; } if (missing & PAGE_WRITE) { if (!(p->flags & PAGE_WRITE_ORG)) { - return -1; /* page not writable */ + ret = -1; /* page not writable */ + break; } /* Asking about writable, but has been protected: undo. */ if (!page_unprotect(start, 0)) { - return -1; + ret = -1; + break; } /* TODO: page_unprotect should take a range, not a single page. */ if (last - start < TARGET_PAGE_SIZE) { - return 0; /* ok */ + ret = 0; /* ok */ + break; } start += TARGET_PAGE_SIZE; continue; } if (last <= p->itree.last) { - return 0; /* ok */ + ret = 0; /* ok */ + break; } start = p->itree.last + 1; } + + /* Release the lock if acquired locally. */ + if (locked < 0) { + mmap_unlock(); + } + return ret; } void page_protect(tb_page_addr_t address) @@ -779,6 +808,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) typedef struct TargetPageDataNode { + struct rcu_head rcu; IntervalTreeNode itree; char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned)); } TargetPageDataNode; @@ -801,11 +831,11 @@ void page_reset_target_data(target_ulong start, target_ulong end) n = next, next = next ? interval_tree_iter_next(n, start, last) : NULL) { target_ulong n_start, n_last, p_ofs, p_len; - TargetPageDataNode *t; + TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); if (n->start >= start && n->last <= last) { interval_tree_remove(n, &targetdata_root); - g_free(n); + g_free_rcu(t, rcu); continue; } @@ -819,7 +849,6 @@ void page_reset_target_data(target_ulong start, target_ulong end) n_last = MIN(last, n->last); p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; - t = container_of(n, TargetPageDataNode, itree); memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE); } } diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst index 52baa07..7957310 100644 --- a/docs/devel/atomics.rst +++ b/docs/devel/atomics.rst @@ -1,3 +1,5 @@ +.. _atomics-ref: + ========================= Atomic operations in QEMU ========================= diff --git a/docs/devel/index-tcg.rst b/docs/devel/index-tcg.rst index 7b9760b..b44ff8b 100644 --- a/docs/devel/index-tcg.rst +++ b/docs/devel/index-tcg.rst @@ -9,6 +9,7 @@ are only implementing things for HW accelerated hypervisors. :maxdepth: 2 tcg + tcg-ops decodetree multi-thread-tcg tcg-icount diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst new file mode 100644 index 0000000..9adc0c9 --- /dev/null +++ b/docs/devel/tcg-ops.rst @@ -0,0 +1,941 @@ +.. _tcg-ops-ref: + +******************************* +TCG Intermediate Representation +******************************* + +Introduction +============ + +TCG (Tiny Code Generator) began as a generic backend for a C +compiler. It was simplified to be used in QEMU. It also has its roots +in the QOP code generator written by Paul Brook. + +Definitions +=========== + +TCG receives RISC-like *TCG ops* and performs some optimizations on them, +including liveness analysis and trivial constant expression +evaluation. TCG ops are then implemented in the host CPU back end, +also known as the TCG target. + +The TCG *target* is the architecture for which we generate the +code. It is of course not the same as the "target" of QEMU which is +the emulated architecture. As TCG started as a generic C backend used +for cross compiling, it is assumed that the TCG target is different +from the host, although it is never the case for QEMU. + +In this document, we use *guest* to specify what architecture we are +emulating; *target* always means the TCG target, the machine on which +we are running QEMU. + +A TCG *function* corresponds to a QEMU Translated Block (TB). + +A TCG *temporary* is a variable only live in a basic block. Temporaries are allocated explicitly in each function. + +A TCG *local temporary* is a variable only live in a function. Local temporaries are allocated explicitly in each function. + +A TCG *global* is a variable which is live in all the functions +(equivalent of a C global variable). They are defined before the +functions defined. A TCG global can be a memory location (e.g. a QEMU +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) +or a memory location which is stored in a register outside QEMU TBs +(not implemented yet). + +A TCG *basic block* corresponds to a list of instructions terminated +by a branch instruction. + +An operation with *undefined behavior* may result in a crash. + +An operation with *unspecified behavior* shall not crash. However, +the result may be one of several possibilities so may be considered +an *undefined result*. + +Intermediate representation +=========================== + +Introduction +------------ + +TCG instructions operate on variables which are temporaries, local +temporaries or globals. TCG instructions and variables are strongly +typed. Two types are supported: 32 bit integers and 64 bit +integers. Pointers are defined as an alias to 32 bit or 64 bit +integers depending on the TCG target word size. + +Each instruction has a fixed number of output variable operands, input +variable operands and always constant operands. + +The notable exception is the call instruction which has a variable +number of outputs and inputs. + +In the textual form, output operands usually come first, followed by +input operands, followed by constant operands. The output type is +included in the instruction name. Constants are prefixed with a '$'. + +.. code-block:: none + + add_i32 t0, t1, t2 /* (t0 <- t1 + t2) */ + + +Assumptions +----------- + +Basic blocks +^^^^^^^^^^^^ + +* Basic blocks end after branches (e.g. brcond_i32 instruction), + goto_tb and exit_tb instructions. + +* Basic blocks start after the end of a previous basic block, or at a + set_label instruction. + +After the end of a basic block, the content of temporaries is +destroyed, but local temporaries and globals are preserved. + +Floating point types +^^^^^^^^^^^^^^^^^^^^ + +* Floating point types are not supported yet + +Pointers +^^^^^^^^ + +* Depending on the TCG target, pointer size is 32 bit or 64 + bit. The type ``TCG_TYPE_PTR`` is an alias to ``TCG_TYPE_I32`` or + ``TCG_TYPE_I64``. + +Helpers +^^^^^^^ + +* Using the tcg_gen_helper_x_y it is possible to call any function + taking i32, i64 or pointer types. By default, before calling a helper, + all globals are stored at their canonical location and it is assumed + that the function can modify them. By default, the helper is allowed to + modify the CPU state or raise an exception. + + This can be overridden using the following function modifiers: + + - ``TCG_CALL_NO_READ_GLOBALS`` means that the helper does not read globals, + either directly or via an exception. They will not be saved to their + canonical locations before calling the helper. + + - ``TCG_CALL_NO_WRITE_GLOBALS`` means that the helper does not modify any globals. + They will only be saved to their canonical location before calling helpers, + but they won't be reloaded afterwards. + + - ``TCG_CALL_NO_SIDE_EFFECTS`` means that the call to the function is removed if + the return value is not used. + + Note that ``TCG_CALL_NO_READ_GLOBALS`` implies ``TCG_CALL_NO_WRITE_GLOBALS``. + + On some TCG targets (e.g. x86), several calling conventions are + supported. + +Branches +^^^^^^^^ + +* Use the instruction 'br' to jump to a label. + +Code Optimizations +------------------ + +When generating instructions, you can count on at least the following +optimizations: + +- Single instructions are simplified, e.g. + + .. code-block:: none + + and_i32 t0, t0, $0xffffffff + + is suppressed. + +- A liveness analysis is done at the basic block level. The + information is used to suppress moves from a dead variable to + another one. It is also used to remove instructions which compute + dead results. The later is especially useful for condition code + optimization in QEMU. + + In the following example: + + .. code-block:: none + + add_i32 t0, t1, t2 + add_i32 t0, t0, $1 + mov_i32 t0, $1 + + only the last instruction is kept. + + +Instruction Reference +===================== + +Function call +------------- + +.. list-table:: + + * - call *<ret>* *<params>* ptr + + - | call function 'ptr' (pointer type) + | + | *<ret>* optional 32 bit or 64 bit return value + | *<params>* optional 32 bit or 64 bit parameters + +Jumps/Labels +------------ + +.. list-table:: + + * - set_label $label + + - | Define label 'label' at the current program point. + + * - br $label + + - | Jump to label. + + * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* + + - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: + | + | ``TCG_COND_EQ`` + | ``TCG_COND_NE`` + | ``TCG_COND_LT /* signed */`` + | ``TCG_COND_GE /* signed */`` + | ``TCG_COND_LE /* signed */`` + | ``TCG_COND_GT /* signed */`` + | ``TCG_COND_LTU /* unsigned */`` + | ``TCG_COND_GEU /* unsigned */`` + | ``TCG_COND_LEU /* unsigned */`` + | ``TCG_COND_GTU /* unsigned */`` + +Arithmetic +---------- + +.. list-table:: + + * - add_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* + *t2* + + * - sub_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* - *t2* + + * - neg_i32/i64 *t0*, *t1* + + - | *t0* = -*t1* (two's complement) + + * - mul_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* * *t2* + + * - div_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* / *t2* (signed) + | Undefined behavior if division by zero or overflow. + + * - divu_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* / *t2* (unsigned) + | Undefined behavior if division by zero. + + * - rem_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* % *t2* (signed) + | Undefined behavior if division by zero or overflow. + + * - remu_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* % *t2* (unsigned) + | Undefined behavior if division by zero. + + +Logical +------- + +.. list-table:: + + * - and_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* & *t2* + + * - or_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* | *t2* + + * - xor_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* ^ *t2* + + * - not_i32/i64 *t0*, *t1* + + - | *t0* = ~\ *t1* + + * - andc_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* & ~\ *t2* + + * - eqv_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* + + * - nand_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = ~(*t1* & *t2*) + + * - nor_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = ~(*t1* | *t2*) + + * - orc_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* | ~\ *t2* + + * - clz_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* ? clz(*t1*) : *t2* + + * - ctz_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* ? ctz(*t1*) : *t2* + + * - ctpop_i32/i64 *t0*, *t1* + + - | *t0* = number of bits set in *t1* + | + | With *ctpop* short for "count population", matching + | the function name used in ``include/qemu/host-utils.h``. + + +Shifts/Rotates +-------------- + +.. list-table:: + + * - shl_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* << *t2* + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + + * - shr_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* >> *t2* (unsigned) + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + + * - sar_i32/i64 *t0*, *t1*, *t2* + + - | *t0* = *t1* >> *t2* (signed) + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + + * - rotl_i32/i64 *t0*, *t1*, *t2* + + - | Rotation of *t2* bits to the left + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + + * - rotr_i32/i64 *t0*, *t1*, *t2* + + - | Rotation of *t2* bits to the right. + | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + + +Misc +---- + +.. list-table:: + + * - mov_i32/i64 *t0*, *t1* + + - | *t0* = *t1* + | Move *t1* to *t0* (both operands must have the same type). + + * - ext8s_i32/i64 *t0*, *t1* + + ext8u_i32/i64 *t0*, *t1* + + ext16s_i32/i64 *t0*, *t1* + + ext16u_i32/i64 *t0*, *t1* + + ext32s_i64 *t0*, *t1* + + ext32u_i64 *t0*, *t1* + + - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) + + * - bswap16_i32/i64 *t0*, *t1*, *flags* + + - | 16 bit byte swap on the low bits of a 32/64 bit input. + | + | If *flags* & ``TCG_BSWAP_IZ``, then *t1* is known to be zero-extended from bit 15. + | If *flags* & ``TCG_BSWAP_OZ``, then *t0* will be zero-extended from bit 15. + | If *flags* & ``TCG_BSWAP_OS``, then *t0* will be sign-extended from bit 15. + | + | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. + + * - bswap32_i64 *t0*, *t1*, *flags* + + - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, + except they apply from bit 31 instead of bit 15. + + * - bswap32_i32 *t0*, *t1*, *flags* + + bswap64_i64 *t0*, *t1*, *flags* + + - | 32/64 bit byte swap. The flags are ignored, but still present + for consistency with the other bswap opcodes. + + * - discard_i32/i64 *t0* + + - | Indicate that the value of *t0* won't be used later. It is useful to + force dead code elimination. + + * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* + + - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. + | + | The bitfield is described by *pos*/*len*, which are immediate values: + | + | *len* - the length of the bitfield + | *pos* - the position of the first bit, counting from the LSB + | + | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field + at bit 8. This operation would be equivalent to + | + | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) + + * - extract_i32/i64 *dest*, *t1*, *pos*, *len* + + sextract_i32/i64 *dest*, *t1*, *pos*, *len* + + - | Extract a bitfield from *t1*, placing the result in *dest*. + | + | The bitfield is described by *pos*/*len*, which are immediate values, + as above for deposit. For extract_*, the result will be extended + to the left with zeros; for sextract_*, the result will be extended + to the left with copies of the bitfield sign bit at *pos* + *len* - 1. + | + | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field + at bit 8. This operation would be equivalent to + | + | *dest* = (*t1* << 20) >> 28 + | + | (using an arithmetic right shift). + + * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* + + - | For N = {32,64}, extract an N-bit quantity from the concatenation + of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander + accepts 0 <= *pos* <= N as inputs. The backend code generator will + not see either 0 or N as inputs for these opcodes. + + * - extrl_i64_i32 *t0*, *t1* + + - | For 64-bit hosts only, extract the low 32-bits of input *t1* and place it + into 32-bit output *t0*. Depending on the host, this may be a simple move, + or may require additional canonicalization. + + * - extrh_i64_i32 *t0*, *t1* + + - | For 64-bit hosts only, extract the high 32-bits of input *t1* and place it + into 32-bit output *t0*. Depending on the host, this may be a simple shift, + or may require additional canonicalization. + + +Conditional moves +----------------- + +.. list-table:: + + * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* + + - | *dest* = (*t1* *cond* *t2*) + | + | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. + + * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* + + - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) + | + | Set *dest* to *v1* if (*c1* *cond* *c2*) is true, otherwise set to *v2*. + + +Type conversions +---------------- + +.. list-table:: + + * - ext_i32_i64 *t0*, *t1* + + - | Convert *t1* (32 bit) to *t0* (64 bit) and does sign extension + + * - extu_i32_i64 *t0*, *t1* + + - | Convert *t1* (32 bit) to *t0* (64 bit) and does zero extension + + * - trunc_i64_i32 *t0*, *t1* + + - | Truncate *t1* (64 bit) to *t0* (32 bit) + + * - concat_i32_i64 *t0*, *t1*, *t2* + + - | Construct *t0* (64-bit) taking the low half from *t1* (32 bit) and the high half + from *t2* (32 bit). + + * - concat32_i64 *t0*, *t1*, *t2* + + - | Construct *t0* (64-bit) taking the low half from *t1* (64 bit) and the high half + from *t2* (64 bit). + + +Load/Store +---------- + +.. list-table:: + + * - ld_i32/i64 *t0*, *t1*, *offset* + + ld8s_i32/i64 *t0*, *t1*, *offset* + + ld8u_i32/i64 *t0*, *t1*, *offset* + + ld16s_i32/i64 *t0*, *t1*, *offset* + + ld16u_i32/i64 *t0*, *t1*, *offset* + + ld32s_i64 t0, *t1*, *offset* + + ld32u_i64 t0, *t1*, *offset* + + - | *t0* = read(*t1* + *offset*) + | + | Load 8, 16, 32 or 64 bits with or without sign extension from host memory. + *offset* must be a constant. + + * - st_i32/i64 *t0*, *t1*, *offset* + + st8_i32/i64 *t0*, *t1*, *offset* + + st16_i32/i64 *t0*, *t1*, *offset* + + st32_i64 *t0*, *t1*, *offset* + + - | write(*t0*, *t1* + *offset*) + | + | Write 8, 16, 32 or 64 bits to host memory. + +All this opcodes assume that the pointed host memory doesn't correspond +to a global. In the latter case the behaviour is unpredictable. + + +Multiword arithmetic support +---------------------------- + +.. list-table:: + + * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + + sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + + - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are + formed from two single-word arguments, and the double-word output *t0* + is returned in two single-word outputs. + + * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + + - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full + double-word product *t0*. The latter is returned in two single-word outputs. + + * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + + - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. + + * - mulsh_i32/i64 *t0*, *t1*, *t2* + + muluh_i32/i64 *t0*, *t1*, *t2* + + - | Provide the high part of a signed or unsigned multiply, respectively. + | + | If mulu2/muls2 are not provided by the backend, the tcg-op generator + can obtain the same results by emitting a pair of opcodes, mul + muluh/mulsh. + + +Memory Barrier support +---------------------- + +.. list-table:: + + * - mb *<$arg>* + + - | Generate a target memory barrier instruction to ensure memory ordering + as being enforced by a corresponding guest memory barrier instruction. + | + | The ordering enforced by the backend may be stricter than the ordering + required by the guest. It cannot be weaker. This opcode takes a constant + argument which is required to generate the appropriate barrier + instruction. The backend should take care to emit the target barrier + instruction only when necessary i.e., for SMP guests and when MTTCG is + enabled. + | + | The guest translators should generate this opcode for all guest instructions + which have ordering side effects. + | + | Please see :ref:`atomics-ref` for more information on memory barriers. + + +64-bit guest on 32-bit host support +----------------------------------- + +The following opcodes are internal to TCG. Thus they are to be implemented by +32-bit host code generators, but are not to be emitted by guest translators. +They are emitted as needed by inline functions within ``tcg-op.h``. + +.. list-table:: + + * - brcond2_i32 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *cond*, *label* + + - | Similar to brcond, except that the 64-bit values *t0* and *t1* + are formed from two 32-bit arguments. + + * - setcond2_i32 *dest*, *t1_low*, *t1_high*, *t2_low*, *t2_high*, *cond* + + - | Similar to setcond, except that the 64-bit values *t1* and *t2* are + formed from two 32-bit arguments. The result is a 32-bit value. + + +QEMU specific operations +------------------------ + +.. list-table:: + + * - exit_tb *t0* + + - | Exit the current TB and return the value *t0* (word type). + + * - goto_tb *index* + + - | Exit the current TB and jump to the TB index *index* (constant) if the + current TB was linked to this TB. Otherwise execute the next + instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued + at most once with each slot index per TB. + + * - lookup_and_goto_ptr *tb_addr* + + - | Look up a TB address *tb_addr* and jump to it if valid. If not valid, + jump to the TCG epilogue to go back to the exec loop. + | + | This operation is optional. If the TCG backend does not implement the + goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). + + * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx* + + qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx* + + qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* + + - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest + address *t1*. The _i32/_i64 size applies to the size of the input/output + register *t0* only. The address *t1* is always sized according to the guest, + and the width of the memory operation is controlled by *flags*. + | + | Both *t0* and *t1* may be split into little-endian ordered pairs of registers + if dealing with 64-bit quantities on a 32-bit host. + | + | The *memidx* selects the qemu tlb index to use (e.g. user or kernel access). + The flags are the MemOp bits, selecting the sign, width, and endianness + of the memory access. + | + | For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a + 64-bit memory access specified in *flags*. + | + | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of + the memory operation is known to be 8-bit. This allows the backend to + provide a different set of register constraints. + + +Host vector operations +---------------------- + +All of the vector ops have two parameters, ``TCGOP_VECL`` & ``TCGOP_VECE``. +The former specifies the length of the vector in log2 64-bit units; the +latter specifies the length of the element (if applicable) in log2 8-bit units. +E.g. VECL = 1 -> 64 << 1 -> v128, and VECE = 2 -> 1 << 2 -> i32. + +.. list-table:: + + * - mov_vec *v0*, *v1* + ld_vec *v0*, *t1* + st_vec *v0*, *t1* + + - | Move, load and store. + + * - dup_vec *v0*, *r1* + + - | Duplicate the low N bits of *r1* into VECL/VECE copies across *v0*. + + * - dupi_vec *v0*, *c* + + - | Similarly, for a constant. + | Smaller values will be replicated to host register size by the expanders. + + * - dup2_vec *v0*, *r1*, *r2* + + - | Duplicate *r2*:*r1* into VECL/64 copies across *v0*. This opcode is + only present for 32-bit hosts. + + * - add_vec *v0*, *v1*, *v2* + + - | *v0* = *v1* + *v2*, in elements across the vector. + + * - sub_vec *v0*, *v1*, *v2* + + - | Similarly, *v0* = *v1* - *v2*. + + * - mul_vec *v0*, *v1*, *v2* + + - | Similarly, *v0* = *v1* * *v2*. + + * - neg_vec *v0*, *v1* + + - | Similarly, *v0* = -*v1*. + + * - abs_vec *v0*, *v1* + + - | Similarly, *v0* = *v1* < 0 ? -*v1* : *v1*, in elements across the vector. + + * - smin_vec *v0*, *v1*, *v2* + + umin_vec *v0*, *v1*, *v2* + + - | Similarly, *v0* = MIN(*v1*, *v2*), for signed and unsigned element types. + + * - smax_vec *v0*, *v1*, *v2* + + umax_vec *v0*, *v1*, *v2* + + - | Similarly, *v0* = MAX(*v1*, *v2*), for signed and unsigned element types. + + * - ssadd_vec *v0*, *v1*, *v2* + + sssub_vec *v0*, *v1*, *v2* + + usadd_vec *v0*, *v1*, *v2* + + ussub_vec *v0*, *v1*, *v2* + + - | Signed and unsigned saturating addition and subtraction. + | + | If the true result is not representable within the element type, the + element is set to the minimum or maximum value for the type. + + * - and_vec *v0*, *v1*, *v2* + + or_vec *v0*, *v1*, *v2* + + xor_vec *v0*, *v1*, *v2* + + andc_vec *v0*, *v1*, *v2* + + orc_vec *v0*, *v1*, *v2* + + not_vec *v0*, *v1* + + - | Similarly, logical operations with and without complement. + | + | Note that VECE is unused. + + * - shli_vec *v0*, *v1*, *i2* + + shls_vec *v0*, *v1*, *s2* + + - | Shift all elements from v1 by a scalar *i2*/*s2*. I.e. + + .. code-block:: c + + for (i = 0; i < VECL/VECE; ++i) { + v0[i] = v1[i] << s2; + } + + * - shri_vec *v0*, *v1*, *i2* + + sari_vec *v0*, *v1*, *i2* + + rotli_vec *v0*, *v1*, *i2* + + shrs_vec *v0*, *v1*, *s2* + + sars_vec *v0*, *v1*, *s2* + + - | Similarly for logical and arithmetic right shift, and left rotate. + + * - shlv_vec *v0*, *v1*, *v2* + + - | Shift elements from *v1* by elements from *v2*. I.e. + + .. code-block:: c + + for (i = 0; i < VECL/VECE; ++i) { + v0[i] = v1[i] << v2[i]; + } + + * - shrv_vec *v0*, *v1*, *v2* + + sarv_vec *v0*, *v1*, *v2* + + rotlv_vec *v0*, *v1*, *v2* + + rotrv_vec *v0*, *v1*, *v2* + + - | Similarly for logical and arithmetic right shift, and rotates. + + * - cmp_vec *v0*, *v1*, *v2*, *cond* + + - | Compare vectors by element, storing -1 for true and 0 for false. + + * - bitsel_vec *v0*, *v1*, *v2*, *v3* + + - | Bitwise select, *v0* = (*v2* & *v1*) | (*v3* & ~\ *v1*), across the entire vector. + + * - cmpsel_vec *v0*, *c1*, *c2*, *v3*, *v4*, *cond* + + - | Select elements based on comparison results: + + .. code-block:: c + + for (i = 0; i < n; ++i) { + v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. + } + +**Note 1**: Some shortcuts are defined when the last operand is known to be +a constant (e.g. addi for add, movi for mov). + +**Note 2**: When using TCG, the opcodes must never be generated directly +as some of them may not be available as "real" opcodes. Always use the +function tcg_gen_xxx(args). + + +Backend +======= + +``tcg-target.h`` contains the target specific definitions. ``tcg-target.c.inc`` +contains the target specific code; it is #included by ``tcg/tcg.c``, rather +than being a standalone C file. + +Assumptions +----------- + +The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or +64 bit. It is expected that the pointer has the same size as the word. + +On a 32 bit target, all 64 bit operations are converted to 32 bits. A +few specific operations must be implemented to allow it (see add2_i32, +sub2_i32, brcond2_i32). + +On a 64 bit target, the values are transferred between 32 and 64-bit +registers using the following ops: + +- trunc_shr_i64_i32 +- ext_i32_i64 +- extu_i32_i64 + +They ensure that the values are correctly truncated or extended when +moved from a 32-bit to a 64-bit register or vice-versa. Note that the +trunc_shr_i64_i32 is an optional op. It is not necessary to implement +it if all the following conditions are met: + +- 64-bit registers can hold 32-bit values +- 32-bit values in a 64-bit register do not need to stay zero or + sign extended +- all 32-bit TCG ops ignore the high part of 64-bit registers + +Floating point operations are not supported in this version. A +previous incarnation of the code generator had full support of them, +but it is better to concentrate on integer operations first. + +Constraints +---------------- + +GCC like constraints are used to define the constraints of every +instruction. Memory constraints are not supported in this +version. Aliases are specified in the input operands as for GCC. + +The same register may be used for both an input and an output, even when +they are not explicitly aliased. If an op expands to multiple target +instructions then care must be taken to avoid clobbering input values. +GCC style "early clobber" outputs are supported, with '``&``'. + +A target can define specific register or constant constraints. If an +operation uses a constant input constraint which does not allow all +constants, it must also accept registers in order to have a fallback. +The constraint '``i``' is defined generically to accept any constant. +The constraint '``r``' is not defined generically, but is consistently +used by each backend to indicate all registers. + +The movi_i32 and movi_i64 operations must accept any constants. + +The mov_i32 and mov_i64 operations must accept any registers of the +same type. + +The ld/st/sti instructions must accept signed 32 bit constant offsets. +This can be implemented by reserving a specific register in which to +compute the address if the offset is too big. + +The ld/st instructions must accept any destination (ld) or source (st) +register. + +The sti instruction may fail if it cannot store the given constant. + +Function call assumptions +------------------------- + +- The only supported types for parameters and return value are: 32 and + 64 bit integers and pointer. +- The stack grows downwards. +- The first N parameters are passed in registers. +- The next parameters are passed on the stack by storing them as words. +- Some registers are clobbered during the call. +- The function can return 0 or 1 value in registers. On a 32 bit + target, functions must be able to return 2 values in registers for + 64 bit return type. + + +Recommended coding rules for best performance +============================================= + +- Use globals to represent the parts of the QEMU CPU state which are + often modified, e.g. the integer registers and the condition + codes. TCG will be able to use host registers to store them. + +- Avoid globals stored in fixed registers. They must be used only to + store the pointer to the CPU state and possibly to store a pointer + to a register window. + +- Use temporaries. Use local temporaries only when really needed, + e.g. when you need to use a value after a jump. Local temporaries + introduce a performance hit in the current TCG implementation: their + content is saved to memory at end of each basic block. + +- Free temporaries and local temporaries when they are no longer used + (tcg_temp_free). Since tcg_const_x() also creates a temporary, you + should free it after it is used. Freeing temporaries does not yield + a better generated code, but it reduces the memory usage of TCG and + the speed of the translation. + +- Don't hesitate to use helpers for complicated or seldom used guest + instructions. There is little performance advantage in using TCG to + implement guest instructions taking more than about twenty TCG + instructions. Note that this rule of thumb is more applicable to + helpers doing complex logic or arithmetic, where the C compiler has + scope to do a good job of optimisation; it is less relevant where + the instruction is mostly doing loads and stores, and in those cases + inline TCG may still be faster for longer sequences. + +- The hard limit on the number of TCG instructions you can generate + per guest instruction is set by ``MAX_OP_PER_INSTR`` in ``exec-all.h`` -- + you cannot exceed this without risking a buffer overrun. + +- Use the 'discard' instruction if you know that TCG won't be able to + prove that a given global is "dead" at a given program point. The + x86 guest uses it to improve the condition codes optimisation. diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst index a65fb7b..136a7a0 100644 --- a/docs/devel/tcg.rst +++ b/docs/devel/tcg.rst @@ -9,7 +9,7 @@ which make it relatively easily portable and simple while achieving good performances. QEMU's dynamic translation backend is called TCG, for "Tiny Code -Generator". For more information, please take a look at ``tcg/README``. +Generator". For more information, please take a look at :ref:`tcg-ops-ref`. The following sections outline some notable features and implementation details of QEMU's dynamic translator. diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c index f189712..d88d6cc 100644 --- a/hw/arm/fsl-imx6ul.c +++ b/hw/arm/fsl-imx6ul.c @@ -81,7 +81,7 @@ static void fsl_imx6ul_init(Object *obj) */ for (i = 0; i < FSL_IMX6UL_NUM_GPTS; i++) { snprintf(name, NAME_SIZE, "gpt%d", i); - object_initialize_child(obj, name, &s->gpt[i], TYPE_IMX7_GPT); + object_initialize_child(obj, name, &s->gpt[i], TYPE_IMX6UL_GPT); } /* diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c index cc6fdb9..afc7480 100644 --- a/hw/arm/fsl-imx7.c +++ b/hw/arm/fsl-imx7.c @@ -219,9 +219,19 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp) FSL_IMX7_GPT4_ADDR, }; + static const int FSL_IMX7_GPTn_IRQ[FSL_IMX7_NUM_GPTS] = { + FSL_IMX7_GPT1_IRQ, + FSL_IMX7_GPT2_IRQ, + FSL_IMX7_GPT3_IRQ, + FSL_IMX7_GPT4_IRQ, + }; + s->gpt[i].ccm = IMX_CCM(&s->ccm); sysbus_realize(SYS_BUS_DEVICE(&s->gpt[i]), &error_abort); sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpt[i]), 0, FSL_IMX7_GPTn_ADDR[i]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpt[i]), 0, + qdev_get_gpio_in(DEVICE(&s->a7mpcore), + FSL_IMX7_GPTn_IRQ[i])); } for (i = 0; i < FSL_IMX7_NUM_GPIOS; i++) { @@ -235,8 +245,37 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp) FSL_IMX7_GPIO7_ADDR, }; + static const int FSL_IMX7_GPIOn_LOW_IRQ[FSL_IMX7_NUM_GPIOS] = { + FSL_IMX7_GPIO1_LOW_IRQ, + FSL_IMX7_GPIO2_LOW_IRQ, + FSL_IMX7_GPIO3_LOW_IRQ, + FSL_IMX7_GPIO4_LOW_IRQ, + FSL_IMX7_GPIO5_LOW_IRQ, + FSL_IMX7_GPIO6_LOW_IRQ, + FSL_IMX7_GPIO7_LOW_IRQ, + }; + + static const int FSL_IMX7_GPIOn_HIGH_IRQ[FSL_IMX7_NUM_GPIOS] = { + FSL_IMX7_GPIO1_HIGH_IRQ, + FSL_IMX7_GPIO2_HIGH_IRQ, + FSL_IMX7_GPIO3_HIGH_IRQ, + FSL_IMX7_GPIO4_HIGH_IRQ, + FSL_IMX7_GPIO5_HIGH_IRQ, + FSL_IMX7_GPIO6_HIGH_IRQ, + FSL_IMX7_GPIO7_HIGH_IRQ, + }; + sysbus_realize(SYS_BUS_DEVICE(&s->gpio[i]), &error_abort); - sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, FSL_IMX7_GPIOn_ADDR[i]); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, + FSL_IMX7_GPIOn_ADDR[i]); + + sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0, + qdev_get_gpio_in(DEVICE(&s->a7mpcore), + FSL_IMX7_GPIOn_LOW_IRQ[i])); + + sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 1, + qdev_get_gpio_in(DEVICE(&s->a7mpcore), + FSL_IMX7_GPIOn_HIGH_IRQ[i])); } /* diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index b151113..c9df063 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -230,13 +230,13 @@ static void n8x0_i2c_setup(struct n800_s *s) } /* Touchscreen and keypad controller */ -static MouseTransformInfo n800_pointercal = { +static const MouseTransformInfo n800_pointercal = { .x = 800, .y = 480, .a = { 14560, -68, -3455208, -39, -9621, 35152972, 65536 }, }; -static MouseTransformInfo n810_pointercal = { +static const MouseTransformInfo n810_pointercal = { .x = 800, .y = 480, .a = { 15041, 148, -4731056, 171, -10238, 35933380, 65536 }, @@ -334,7 +334,7 @@ static void n810_key_event(void *opaque, int keycode) #define M 0 -static int n810_keys[0x80] = { +static const int n810_keys[0x80] = { [0x01] = 16, /* Q */ [0x02] = 37, /* K */ [0x03] = 24, /* O */ @@ -810,7 +810,7 @@ static void n8x0_usb_setup(struct n800_s *s) /* Setup done before the main bootloader starts by some early setup code * - used when we want to run the main bootloader in emulation. This * isn't documented. */ -static uint32_t n800_pinout[104] = { +static const uint32_t n800_pinout[104] = { 0x080f00d8, 0x00d40808, 0x03080808, 0x080800d0, 0x00dc0808, 0x0b0f0f00, 0x080800b4, 0x00c00808, 0x08080808, 0x180800c4, 0x00b80000, 0x08080808, @@ -1060,7 +1060,7 @@ static void n8x0_boot_init(void *opaque) #define OMAP_TAG_CBUS 0x4e03 #define OMAP_TAG_EM_ASIC_BB5 0x4e04 -static struct omap_gpiosw_info_s { +static const struct omap_gpiosw_info_s { const char *name; int line; int type; @@ -1078,7 +1078,7 @@ static struct omap_gpiosw_info_s { "headphone", N8X0_HEADPHONE_GPIO, OMAP_GPIOSW_TYPE_CONNECTION | OMAP_GPIOSW_INVERTED, }, - { NULL } + { /* end of list */ } }, n810_gpiosw_info[] = { { "gps_reset", N810_GPS_RESET_GPIO, @@ -1099,10 +1099,10 @@ static struct omap_gpiosw_info_s { "slide", N810_SLIDE_GPIO, OMAP_GPIOSW_TYPE_COVER | OMAP_GPIOSW_INVERTED, }, - { NULL } + { /* end of list */ } }; -static struct omap_partition_info_s { +static const struct omap_partition_info_s { uint32_t offset; uint32_t size; int mask; @@ -1113,27 +1113,25 @@ static struct omap_partition_info_s { { 0x00080000, 0x00200000, 0x0, "kernel" }, { 0x00280000, 0x00200000, 0x3, "initfs" }, { 0x00480000, 0x0fb80000, 0x3, "rootfs" }, - - { 0, 0, 0, NULL } + { /* end of list */ } }, n810_part_info[] = { { 0x00000000, 0x00020000, 0x3, "bootloader" }, { 0x00020000, 0x00060000, 0x0, "config" }, { 0x00080000, 0x00220000, 0x0, "kernel" }, { 0x002a0000, 0x00400000, 0x0, "initfs" }, { 0x006a0000, 0x0f960000, 0x0, "rootfs" }, - - { 0, 0, 0, NULL } + { /* end of list */ } }; -static uint8_t n8x0_bd_addr[6] = { N8X0_BD_ADDR }; +static const uint8_t n8x0_bd_addr[6] = { N8X0_BD_ADDR }; static int n8x0_atag_setup(void *p, int model) { uint8_t *b; uint16_t *w; uint32_t *l; - struct omap_gpiosw_info_s *gpiosw; - struct omap_partition_info_s *partition; + const struct omap_gpiosw_info_s *gpiosw; + const struct omap_partition_info_s *partition; const char *tag; w = p; diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 2208385..54186f3 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -116,7 +116,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) g_hash_table_insert(bs->iotlb, key, new); } -inline void smmu_iotlb_inv_all(SMMUState *s) +void smmu_iotlb_inv_all(SMMUState *s) { trace_smmu_iotlb_inv_all(); g_hash_table_remove_all(s->iotlb); @@ -146,9 +146,8 @@ static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, ((entry->iova & ~info->mask) == info->iova); } -inline void -smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, - uint8_t tg, uint64_t num_pages, uint8_t ttl) +void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, + uint8_t tg, uint64_t num_pages, uint8_t ttl) { /* if tg is not set we use 4KB range invalidation */ uint8_t granule = tg ? tg * 2 + 10 : 12; @@ -174,7 +173,7 @@ smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, &info); } -inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) +void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) { trace_smmu_iotlb_inv_asid(asid); g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid); @@ -374,8 +373,8 @@ error: * * return 0 on success */ -inline int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) +int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, + SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { if (!cfg->aa64) { /* @@ -483,7 +482,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) } /* Unmap all notifiers attached to @mr */ -inline void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) +static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) { IOMMUNotifier *n; diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index 78b5f35..b177e76 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -235,6 +235,7 @@ static void cpu_common_initfn(Object *obj) /* the default value is changed by qemu_init_vcpu() for softmmu */ cpu->nr_cores = 1; cpu->nr_threads = 1; + cpu->cflags_next_tb = -1; qemu_mutex_init(&cpu->work_mutex); QSIMPLEQ_INIT(&cpu->work_list); diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c index 14698ce..555b677 100644 --- a/hw/input/tsc2005.c +++ b/hw/input/tsc2005.c @@ -523,7 +523,7 @@ void *tsc2005_init(qemu_irq pintdav) * from the touchscreen. Assuming 12-bit precision was used during * tslib calibration. */ -void tsc2005_set_transform(void *opaque, MouseTransformInfo *info) +void tsc2005_set_transform(void *opaque, const MouseTransformInfo *info) { TSC2005State *s = (TSC2005State *) opaque; diff --git a/hw/input/tsc210x.c b/hw/input/tsc210x.c index df7313d..fdd5ff8 100644 --- a/hw/input/tsc210x.c +++ b/hw/input/tsc210x.c @@ -1176,8 +1176,7 @@ I2SCodec *tsc210x_codec(uWireSlave *chip) * from the touchscreen. Assuming 12-bit precision was used during * tslib calibration. */ -void tsc210x_set_transform(uWireSlave *chip, - MouseTransformInfo *info) +void tsc210x_set_transform(uWireSlave *chip, const MouseTransformInfo *info) { TSC210xState *s = (TSC210xState *) chip->opaque; #if 0 diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index ecd2883..21441d0 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -72,12 +72,15 @@ config RISCV_ACLINT config RISCV_APLIC bool + select MSI_NONBROKEN config RISCV_IMSIC bool + select MSI_NONBROKEN config SIFIVE_PLIC bool + select MSI_NONBROKEN config GOLDFISH_PIC bool diff --git a/hw/intc/loongarch_pch_msi.c b/hw/intc/loongarch_pch_msi.c index b36d6d7..ecf3ed0 100644 --- a/hw/intc/loongarch_pch_msi.c +++ b/hw/intc/loongarch_pch_msi.c @@ -32,7 +32,7 @@ static void loongarch_msi_mem_write(void *opaque, hwaddr addr, */ irq_num = (val & 0xff) - s->irq_base; trace_loongarch_msi_set_irq(irq_num); - assert(irq_num < PCH_MSI_IRQ_NUM); + assert(irq_num < s->irq_num); qemu_set_irq(s->pch_msi_irq[irq_num], 1); } @@ -49,6 +49,28 @@ static void pch_msi_irq_handler(void *opaque, int irq, int level) qemu_set_irq(s->pch_msi_irq[irq], level); } +static void loongarch_pch_msi_realize(DeviceState *dev, Error **errp) +{ + LoongArchPCHMSI *s = LOONGARCH_PCH_MSI(dev); + + if (!s->irq_num || s->irq_num > PCH_MSI_IRQ_NUM) { + error_setg(errp, "Invalid 'msi_irq_num'"); + return; + } + + s->pch_msi_irq = g_new(qemu_irq, s->irq_num); + + qdev_init_gpio_out(dev, s->pch_msi_irq, s->irq_num); + qdev_init_gpio_in(dev, pch_msi_irq_handler, s->irq_num); +} + +static void loongarch_pch_msi_unrealize(DeviceState *dev) +{ + LoongArchPCHMSI *s = LOONGARCH_PCH_MSI(dev); + + g_free(s->pch_msi_irq); +} + static void loongarch_pch_msi_init(Object *obj) { LoongArchPCHMSI *s = LOONGARCH_PCH_MSI(obj); @@ -59,12 +81,11 @@ static void loongarch_pch_msi_init(Object *obj) sysbus_init_mmio(sbd, &s->msi_mmio); msi_nonbroken = true; - qdev_init_gpio_out(DEVICE(obj), s->pch_msi_irq, PCH_MSI_IRQ_NUM); - qdev_init_gpio_in(DEVICE(obj), pch_msi_irq_handler, PCH_MSI_IRQ_NUM); } static Property loongarch_msi_properties[] = { DEFINE_PROP_UINT32("msi_irq_base", LoongArchPCHMSI, irq_base, 0), + DEFINE_PROP_UINT32("msi_irq_num", LoongArchPCHMSI, irq_num, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -72,6 +93,8 @@ static void loongarch_pch_msi_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = loongarch_pch_msi_realize; + dc->unrealize = loongarch_pch_msi_unrealize; device_class_set_props(dc, loongarch_msi_properties); } diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c index 3380b09..9208fc4 100644 --- a/hw/intc/loongarch_pch_pic.c +++ b/hw/intc/loongarch_pch_pic.c @@ -6,12 +6,16 @@ */ #include "qemu/osdep.h" +#include "qemu/bitops.h" #include "hw/sysbus.h" #include "hw/loongarch/virt.h" +#include "hw/pci-host/ls7a.h" #include "hw/irq.h" #include "hw/intc/loongarch_pch_pic.h" +#include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "trace.h" +#include "qapi/error.h" static void pch_pic_update_irq(LoongArchPCHPIC *s, uint64_t mask, int level) { @@ -40,7 +44,7 @@ static void pch_pic_irq_handler(void *opaque, int irq, int level) LoongArchPCHPIC *s = LOONGARCH_PCH_PIC(opaque); uint64_t mask = 1ULL << irq; - assert(irq < PCH_PIC_IRQ_NUM); + assert(irq < s->irq_num); trace_loongarch_pch_pic_irq_handler(irq, level); if (s->intedge & mask) { @@ -78,7 +82,12 @@ static uint64_t loongarch_pch_pic_low_readw(void *opaque, hwaddr addr, val = PCH_PIC_INT_ID_VAL; break; case PCH_PIC_INT_ID_HI: - val = PCH_PIC_INT_ID_NUM; + /* + * With 7A1000 manual + * bit 0-15 pch irqchip version + * bit 16-31 irq number supported with pch irqchip + */ + val = deposit32(PCH_PIC_INT_ID_VER, 16, 16, s->irq_num - 1); break; case PCH_PIC_INT_MASK_LO: val = (uint32_t)s->int_mask; @@ -365,6 +374,19 @@ static void loongarch_pch_pic_reset(DeviceState *d) s->int_polarity = 0x0; } +static void loongarch_pch_pic_realize(DeviceState *dev, Error **errp) +{ + LoongArchPCHPIC *s = LOONGARCH_PCH_PIC(dev); + + if (!s->irq_num || s->irq_num > VIRT_PCH_PIC_IRQ_NUM) { + error_setg(errp, "Invalid 'pic_irq_num'"); + return; + } + + qdev_init_gpio_out(dev, s->parent_irq, s->irq_num); + qdev_init_gpio_in(dev, pch_pic_irq_handler, s->irq_num); +} + static void loongarch_pch_pic_init(Object *obj) { LoongArchPCHPIC *s = LOONGARCH_PCH_PIC(obj); @@ -382,10 +404,13 @@ static void loongarch_pch_pic_init(Object *obj) sysbus_init_mmio(sbd, &s->iomem8); sysbus_init_mmio(sbd, &s->iomem32_high); - qdev_init_gpio_out(DEVICE(obj), s->parent_irq, PCH_PIC_IRQ_NUM); - qdev_init_gpio_in(DEVICE(obj), pch_pic_irq_handler, PCH_PIC_IRQ_NUM); } +static Property loongarch_pch_pic_properties[] = { + DEFINE_PROP_UINT32("pch_pic_irq_num", LoongArchPCHPIC, irq_num, 0), + DEFINE_PROP_END_OF_LIST(), +}; + static const VMStateDescription vmstate_loongarch_pch_pic = { .name = TYPE_LOONGARCH_PCH_PIC, .version_id = 1, @@ -411,8 +436,10 @@ static void loongarch_pch_pic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = loongarch_pch_pic_realize; dc->reset = loongarch_pch_pic_reset; dc->vmsd = &vmstate_loongarch_pch_pic; + device_class_set_props(dc, loongarch_pch_pic_properties); } static const TypeInfo loongarch_pch_pic_info = { diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c index c2dfacf..5522ede 100644 --- a/hw/intc/sifive_plic.c +++ b/hw/intc/sifive_plic.c @@ -42,7 +42,6 @@ static PLICMode char_to_mode(char c) switch (c) { case 'U': return PLICMode_U; case 'S': return PLICMode_S; - case 'H': return PLICMode_H; case 'M': return PLICMode_M; default: error_report("plic: invalid mode '%c'", c); @@ -78,6 +77,7 @@ static uint32_t sifive_plic_claimed(SiFivePLICState *plic, uint32_t addrid) uint32_t max_irq = 0; uint32_t max_prio = plic->target_priority[addrid]; int i, j; + int num_irq_in_word = 32; for (i = 0; i < plic->bitfield_words; i++) { uint32_t pending_enabled_not_claimed = @@ -88,7 +88,16 @@ static uint32_t sifive_plic_claimed(SiFivePLICState *plic, uint32_t addrid) continue; } - for (j = 0; j < 32; j++) { + if (i == (plic->bitfield_words - 1)) { + /* + * If plic->num_sources is not multiple of 32, num-of-irq in last + * word is not 32. Compute the num-of-irq of last word to avoid + * out-of-bound access of source_priority array. + */ + num_irq_in_word = plic->num_sources - ((plic->bitfield_words - 1) << 5); + } + + for (j = 0; j < num_irq_in_word; j++) { int irq = (i << 5) + j; uint32_t prio = plic->source_priority[irq]; int enabled = pending_enabled_not_claimed & (1 << j); @@ -131,10 +140,11 @@ static uint64_t sifive_plic_read(void *opaque, hwaddr addr, unsigned size) SiFivePLICState *plic = opaque; if (addr_between(addr, plic->priority_base, plic->num_sources << 2)) { - uint32_t irq = ((addr - plic->priority_base) >> 2) + 1; + uint32_t irq = (addr - plic->priority_base) >> 2; return plic->source_priority[irq]; - } else if (addr_between(addr, plic->pending_base, plic->num_sources >> 3)) { + } else if (addr_between(addr, plic->pending_base, + (plic->num_sources + 31) >> 3)) { uint32_t word = (addr - plic->pending_base) >> 2; return plic->pending[word]; @@ -178,7 +188,7 @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value, SiFivePLICState *plic = opaque; if (addr_between(addr, plic->priority_base, plic->num_sources << 2)) { - uint32_t irq = ((addr - plic->priority_base) >> 2) + 1; + uint32_t irq = (addr - plic->priority_base) >> 2; if (((plic->num_priorities + 1) & plic->num_priorities) == 0) { /* @@ -193,7 +203,7 @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value, sifive_plic_update(plic); } } else if (addr_between(addr, plic->pending_base, - plic->num_sources >> 3)) { + (plic->num_sources + 31) >> 3)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid pending write: 0x%" HWADDR_PRIx "", __func__, addr); @@ -281,7 +291,7 @@ static void sifive_plic_reset(DeviceState *dev) */ static void parse_hart_config(SiFivePLICState *plic) { - int addrid, hartid, modes; + int addrid, hartid, modes, m; const char *p; char c; @@ -290,11 +300,13 @@ static void parse_hart_config(SiFivePLICState *plic) p = plic->hart_config; while ((c = *p++)) { if (c == ',') { - addrid += ctpop8(modes); - modes = 0; - hartid++; + if (modes) { + addrid += ctpop8(modes); + hartid++; + modes = 0; + } } else { - int m = 1 << char_to_mode(c); + m = 1 << char_to_mode(c); if (modes == (modes | m)) { error_report("plic: duplicate mode '%c' in config: %s", c, plic->hart_config); @@ -305,8 +317,9 @@ static void parse_hart_config(SiFivePLICState *plic) } if (modes) { addrid += ctpop8(modes); + hartid++; + modes = 0; } - hartid++; plic->num_addrs = addrid; plic->num_harts = hartid; @@ -317,11 +330,16 @@ static void parse_hart_config(SiFivePLICState *plic) p = plic->hart_config; while ((c = *p++)) { if (c == ',') { - hartid++; + if (modes) { + hartid++; + modes = 0; + } } else { + m = char_to_mode(c); plic->addr_config[addrid].addrid = addrid; plic->addr_config[addrid].hartid = hartid; - plic->addr_config[addrid].mode = char_to_mode(c); + plic->addr_config[addrid].mode = m; + modes |= (1 << m); addrid++; } } @@ -346,6 +364,11 @@ static void sifive_plic_realize(DeviceState *dev, Error **errp) parse_hart_config(s); + if (!s->num_sources) { + error_setg(errp, "plic: invalid number of interrupt sources"); + return; + } + s->bitfield_words = (s->num_sources + 31) >> 5; s->num_enables = s->bitfield_words * s->num_addrs; s->source_priority = g_new0(uint32_t, s->num_sources); @@ -362,7 +385,8 @@ static void sifive_plic_realize(DeviceState *dev, Error **errp) s->m_external_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts); qdev_init_gpio_out(dev, s->m_external_irqs, s->num_harts); - /* We can't allow the supervisor to control SEIP as this would allow the + /* + * We can't allow the supervisor to control SEIP as this would allow the * supervisor to clear a pending external interrupt which will result in * lost a interrupt in the case a PLIC is attached. The SEIP bit must be * hardware controlled when a PLIC is attached. @@ -370,8 +394,8 @@ static void sifive_plic_realize(DeviceState *dev, Error **errp) for (i = 0; i < s->num_harts; i++) { RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) { - error_report("SEIP already claimed"); - exit(1); + error_setg(errp, "SEIP already claimed"); + return; } } @@ -402,8 +426,10 @@ static const VMStateDescription vmstate_sifive_plic = { static Property sifive_plic_properties[] = { DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config), DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0), - DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0), + /* number of interrupt sources including interrupt source 0 */ + DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 1), DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0), + /* interrupt priority register base starting from source 0 */ DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0), DEFINE_PROP_UINT32("pending-base", SiFivePLICState, pending_base, 0), DEFINE_PROP_UINT32("enable-base", SiFivePLICState, enable_base, 0), @@ -476,11 +502,11 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config, CPUState *cpu = qemu_get_cpu(cpu_num); if (plic->addr_config[i].mode == PLICMode_M) { - qdev_connect_gpio_out(dev, num_harts - plic->hartid_base + cpu_num, + qdev_connect_gpio_out(dev, cpu_num - hartid_base + num_harts, qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT)); } if (plic->addr_config[i].mode == PLICMode_S) { - qdev_connect_gpio_out(dev, cpu_num, + qdev_connect_gpio_out(dev, cpu_num - hartid_base, qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT)); } } diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index c8a495e..66be925 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -553,7 +553,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) LoongArchCPU *lacpu; CPULoongArchState *env; CPUState *cpu_state; - int cpu, pin, i; + int cpu, pin, i, start, num; ipi = qdev_new(TYPE_LOONGARCH_IPI); sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); @@ -616,6 +616,8 @@ static void loongarch_irq_init(LoongArchMachineState *lams) } pch_pic = qdev_new(TYPE_LOONGARCH_PCH_PIC); + num = VIRT_PCH_PIC_IRQ_NUM; + qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num); d = SYS_BUS_DEVICE(pch_pic); sysbus_realize_and_unref(d, &error_fatal); memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE, @@ -627,20 +629,23 @@ static void loongarch_irq_init(LoongArchMachineState *lams) VIRT_IOAPIC_REG_BASE + PCH_PIC_INT_STATUS_LO, sysbus_mmio_get_region(d, 2)); - /* Connect 64 pch_pic irqs to extioi */ - for (int i = 0; i < PCH_PIC_IRQ_NUM; i++) { + /* Connect pch_pic irqs to extioi */ + for (int i = 0; i < num; i++) { qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); } pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); - qdev_prop_set_uint32(pch_msi, "msi_irq_base", PCH_MSI_IRQ_START); + start = num; + num = EXTIOI_IRQS - start; + qdev_prop_set_uint32(pch_msi, "msi_irq_base", start); + qdev_prop_set_uint32(pch_msi, "msi_irq_num", num); d = SYS_BUS_DEVICE(pch_msi); sysbus_realize_and_unref(d, &error_fatal); sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); - for (i = 0; i < PCH_MSI_IRQ_NUM; i++) { - /* Connect 192 pch_msi irqs to extioi */ + for (i = 0; i < num; i++) { + /* Connect pch_msi irqs to extioi */ qdev_connect_gpio_out(DEVICE(d), i, - qdev_get_gpio_in(extioi, i + PCH_MSI_IRQ_START)); + qdev_get_gpio_in(extioi, i + start)); } loongarch_devices_init(pch_pic, lams); diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c index 2db5e10..73437cd 100644 --- a/hw/mips/mips_int.c +++ b/hw/mips/mips_int.c @@ -32,17 +32,12 @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) MIPSCPU *cpu = opaque; CPUMIPSState *env = &cpu->env; CPUState *cs = CPU(cpu); - bool locked = false; if (irq < 0 || irq > 7) { return; } - /* Make sure locking works even if BQL is already held by the caller */ - if (!qemu_mutex_iothread_locked()) { - locked = true; - qemu_mutex_lock_iothread(); - } + QEMU_IOTHREAD_LOCK_GUARD(); if (level) { env->CP0_Cause |= 1 << (irq + CP0Ca_IP); @@ -59,10 +54,6 @@ static void cpu_mips_irq_request(void *opaque, int irq, int level) } else { cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } - - if (locked) { - qemu_mutex_unlock_iothread(); - } } void cpu_mips_irq_init_cpu(MIPSCPU *cpu) diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c index a65d031..e01bb68 100644 --- a/hw/misc/imx6ul_ccm.c +++ b/hw/misc/imx6ul_ccm.c @@ -522,12 +522,6 @@ static uint32_t imx6ul_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock) case CLK_32k: freq = CKIL_FREQ; break; - case CLK_HIGH: - freq = CKIH_FREQ; - break; - case CLK_HIGH_DIV: - freq = CKIH_FREQ / 8; - break; default: qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: unsupported clock %d\n", TYPE_IMX6UL_CCM, __func__, clock); diff --git a/hw/misc/imx7_ccm.c b/hw/misc/imx7_ccm.c index 075159e..f135ec7 100644 --- a/hw/misc/imx7_ccm.c +++ b/hw/misc/imx7_ccm.c @@ -16,6 +16,10 @@ #include "hw/misc/imx7_ccm.h" #include "migration/vmstate.h" +#include "trace.h" + +#define CKIH_FREQ 24000000 /* 24MHz crystal input */ + static void imx7_analog_reset(DeviceState *dev) { IMX7AnalogState *s = IMX7_ANALOG(dev); @@ -219,16 +223,43 @@ static const VMStateDescription vmstate_imx7_ccm = { static uint32_t imx7_ccm_get_clock_frequency(IMXCCMState *dev, IMXClk clock) { /* - * This function is "consumed" by GPT emulation code, however on - * i.MX7 each GPT block can have their own clock root. This means - * that this functions needs somehow to know requester's identity - * and the way to pass it: be it via additional IMXClk constants - * or by adding another argument to this method needs to be - * figured out + * This function is "consumed" by GPT emulation code. Some clocks + * have fixed frequencies and we can provide requested frequency + * easily. However for CCM provided clocks (like IPG) each GPT + * timer can have its own clock root. + * This means we need additionnal information when calling this + * function to know the requester's identity. */ - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Not implemented\n", - TYPE_IMX7_CCM, __func__); - return 0; + uint32_t freq = 0; + + switch (clock) { + case CLK_NONE: + break; + case CLK_32k: + freq = CKIL_FREQ; + break; + case CLK_HIGH: + freq = CKIH_FREQ; + break; + case CLK_IPG: + case CLK_IPG_HIGH: + /* + * For now we don't have a way to figure out the device this + * function is called for. Until then the IPG derived clocks + * are left unimplemented. + */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Clock %d Not implemented\n", + TYPE_IMX7_CCM, __func__, clock); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: unsupported clock %d\n", + TYPE_IMX7_CCM, __func__, clock); + break; + } + + trace_ccm_clock_freq(clock, freq); + + return freq; } static void imx7_ccm_class_init(ObjectClass *klass, void *data) diff --git a/hw/misc/mchp_pfsoc_ioscb.c b/hw/misc/mchp_pfsoc_ioscb.c index f4fd55a..a71d134 100644 --- a/hw/misc/mchp_pfsoc_ioscb.c +++ b/hw/misc/mchp_pfsoc_ioscb.c @@ -24,6 +24,7 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" +#include "hw/irq.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_ioscb.h" @@ -33,6 +34,10 @@ */ #define IOSCB_WHOLE_REG_SIZE 0x10000000 #define IOSCB_SUBMOD_REG_SIZE 0x1000 +#define IOSCB_CCC_REG_SIZE 0x2000000 +#define IOSCB_CTRL_REG_SIZE 0x800 +#define IOSCB_QSPIXIP_REG_SIZE 0x200 + /* * There are many sub-modules in the IOSCB module. @@ -44,7 +49,10 @@ #define IOSCB_LANE01_BASE 0x06500000 #define IOSCB_LANE23_BASE 0x06510000 #define IOSCB_CTRL_BASE 0x07020000 +#define IOSCB_QSPIXIP_BASE 0x07020100 +#define IOSCB_MAILBOX_BASE 0x07020800 #define IOSCB_CFG_BASE 0x07080000 +#define IOSCB_CCC_BASE 0x08000000 #define IOSCB_PLL_MSS_BASE 0x0E001000 #define IOSCB_CFM_MSS_BASE 0x0E002000 #define IOSCB_PLL_DDR_BASE 0x0E010000 @@ -141,6 +149,58 @@ static const MemoryRegionOps mchp_pfsoc_io_calib_ddr_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; +#define SERVICES_CR 0x50 +#define SERVICES_SR 0x54 +#define SERVICES_STATUS_SHIFT 16 + +static uint64_t mchp_pfsoc_ctrl_read(void *opaque, hwaddr offset, + unsigned size) +{ + uint32_t val = 0; + + switch (offset) { + case SERVICES_SR: + /* + * Although some services have no error codes, most do. All services + * that do implement errors, begin their error codes at 1. Treat all + * service requests as failures & return 1. + * See the "PolarFire® FPGA and PolarFire SoC FPGA System Services" + * user guide for more information on service error codes. + */ + val = 1u << SERVICES_STATUS_SHIFT; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: unimplemented device read " + "(size %d, offset 0x%" HWADDR_PRIx ")\n", + __func__, size, offset); + } + + return val; +} + +static void mchp_pfsoc_ctrl_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + MchpPfSoCIoscbState *s = opaque; + + switch (offset) { + case SERVICES_CR: + qemu_irq_raise(s->irq); + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " + "(size %d, value 0x%" PRIx64 + ", offset 0x%" HWADDR_PRIx ")\n", + __func__, size, value, offset); + } +} + +static const MemoryRegionOps mchp_pfsoc_ctrl_ops = { + .read = mchp_pfsoc_ctrl_read, + .write = mchp_pfsoc_ctrl_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + static void mchp_pfsoc_ioscb_realize(DeviceState *dev, Error **errp) { MchpPfSoCIoscbState *s = MCHP_PFSOC_IOSCB(dev); @@ -160,14 +220,26 @@ static void mchp_pfsoc_ioscb_realize(DeviceState *dev, Error **errp) "mchp.pfsoc.ioscb.lane23", IOSCB_SUBMOD_REG_SIZE); memory_region_add_subregion(&s->container, IOSCB_LANE23_BASE, &s->lane23); - memory_region_init_io(&s->ctrl, OBJECT(s), &mchp_pfsoc_dummy_ops, s, - "mchp.pfsoc.ioscb.ctrl", IOSCB_SUBMOD_REG_SIZE); + memory_region_init_io(&s->ctrl, OBJECT(s), &mchp_pfsoc_ctrl_ops, s, + "mchp.pfsoc.ioscb.ctrl", IOSCB_CTRL_REG_SIZE); memory_region_add_subregion(&s->container, IOSCB_CTRL_BASE, &s->ctrl); + memory_region_init_io(&s->qspixip, OBJECT(s), &mchp_pfsoc_dummy_ops, s, + "mchp.pfsoc.ioscb.qspixip", IOSCB_QSPIXIP_REG_SIZE); + memory_region_add_subregion(&s->container, IOSCB_QSPIXIP_BASE, &s->qspixip); + + memory_region_init_io(&s->mailbox, OBJECT(s), &mchp_pfsoc_dummy_ops, s, + "mchp.pfsoc.ioscb.mailbox", IOSCB_SUBMOD_REG_SIZE); + memory_region_add_subregion(&s->container, IOSCB_MAILBOX_BASE, &s->mailbox); + memory_region_init_io(&s->cfg, OBJECT(s), &mchp_pfsoc_dummy_ops, s, "mchp.pfsoc.ioscb.cfg", IOSCB_SUBMOD_REG_SIZE); memory_region_add_subregion(&s->container, IOSCB_CFG_BASE, &s->cfg); + memory_region_init_io(&s->ccc, OBJECT(s), &mchp_pfsoc_dummy_ops, s, + "mchp.pfsoc.ioscb.ccc", IOSCB_CCC_REG_SIZE); + memory_region_add_subregion(&s->container, IOSCB_CCC_BASE, &s->ccc); + memory_region_init_io(&s->pll_mss, OBJECT(s), &mchp_pfsoc_pll_ops, s, "mchp.pfsoc.ioscb.pll_mss", IOSCB_SUBMOD_REG_SIZE); memory_region_add_subregion(&s->container, IOSCB_PLL_MSS_BASE, &s->pll_mss); @@ -216,6 +288,8 @@ static void mchp_pfsoc_ioscb_realize(DeviceState *dev, Error **errp) IOSCB_SUBMOD_REG_SIZE); memory_region_add_subregion(&s->container, IOSCB_IO_CALIB_SGMII_BASE, &s->io_calib_sgmii); + + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); } static void mchp_pfsoc_ioscb_class_init(ObjectClass *klass, void *data) diff --git a/hw/misc/mchp_pfsoc_sysreg.c b/hw/misc/mchp_pfsoc_sysreg.c index 89571ed..7876fe0 100644 --- a/hw/misc/mchp_pfsoc_sysreg.c +++ b/hw/misc/mchp_pfsoc_sysreg.c @@ -24,10 +24,12 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "qapi/error.h" +#include "hw/irq.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_sysreg.h" #define ENVM_CR 0xb8 +#define MESSAGE_INT 0x118c static uint64_t mchp_pfsoc_sysreg_read(void *opaque, hwaddr offset, unsigned size) @@ -52,10 +54,17 @@ static uint64_t mchp_pfsoc_sysreg_read(void *opaque, hwaddr offset, static void mchp_pfsoc_sysreg_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { - qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " - "(size %d, value 0x%" PRIx64 - ", offset 0x%" HWADDR_PRIx ")\n", - __func__, size, value, offset); + MchpPfSoCSysregState *s = opaque; + switch (offset) { + case MESSAGE_INT: + qemu_irq_lower(s->irq); + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: unimplemented device write " + "(size %d, value 0x%" PRIx64 + ", offset 0x%" HWADDR_PRIx ")\n", + __func__, size, value, offset); + } } static const MemoryRegionOps mchp_pfsoc_sysreg_ops = { @@ -73,6 +82,7 @@ static void mchp_pfsoc_sysreg_realize(DeviceState *dev, Error **errp) "mchp.pfsoc.sysreg", MCHP_PFSOC_SYSREG_REG_SIZE); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->sysreg); + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq); } static void mchp_pfsoc_sysreg_class_init(ObjectClass *klass, void *data) diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 8c11b23..c862d96 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -1068,9 +1068,9 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, return 0; } - /* 4 bytes for the CRC. */ - size += 4; crc = cpu_to_be32(crc32(~0, buf, size)); + /* Increase size by 4, loop below reads the last 4 bytes from crc_ptr. */ + size += 4; crc_ptr = (uint8_t *) &crc; /* Huge frames are truncated. */ @@ -1164,9 +1164,9 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, return 0; } - /* 4 bytes for the CRC. */ - size += 4; crc = cpu_to_be32(crc32(~0, buf, size)); + /* Increase size by 4, loop below reads the last 4 bytes from crc_ptr. */ + size += 4; crc_ptr = (uint8_t *) &crc; if (shift16) { diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index dc86c1c..4e816c6 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -44,13 +44,9 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) { CPUPPCState *env = &cpu->env; unsigned int old_pending; - bool locked = false; /* We may already have the BQL if coming from the reset path */ - if (!qemu_mutex_iothread_locked()) { - locked = true; - qemu_mutex_lock_iothread(); - } + QEMU_IOTHREAD_LOCK_GUARD(); old_pending = env->pending_interrupts; @@ -67,10 +63,6 @@ void ppc_set_irq(PowerPCCPU *cpu, int irq, int level) trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts, CPU(cpu)->interrupt_request); - - if (locked) { - qemu_mutex_unlock_iothread(); - } } /* PowerPC 6xx / 7xx internal IRQ controller */ diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 79ff61c..4550b3b 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -4,6 +4,8 @@ config RISCV_NUMA config IBEX bool +# RISC-V machines in alphabetical order + config MICROCHIP_PFSOC bool select CADENCE_SDHCI @@ -11,7 +13,6 @@ config MICROCHIP_PFSOC select MCHP_PFSOC_IOSCB select MCHP_PFSOC_MMUART select MCHP_PFSOC_SYSREG - select MSI_NONBROKEN select RISCV_ACLINT select SIFIVE_PDMA select SIFIVE_PLIC @@ -20,14 +21,8 @@ config MICROCHIP_PFSOC config OPENTITAN bool select IBEX - select UNIMP - -config SHAKTI_C - bool - select UNIMP - select SHAKTI_UART - select RISCV_ACLINT select SIFIVE_PLIC + select UNIMP config RISCV_VIRT bool @@ -37,7 +32,6 @@ config RISCV_VIRT imply TPM_TIS_SYSBUS select RISCV_NUMA select GOLDFISH_RTC - select MSI_NONBROKEN select PCI select PCI_EXPRESS_GENERIC_BRIDGE select PFLASH_CFI01 @@ -51,9 +45,15 @@ config RISCV_VIRT select FW_CFG_DMA select PLATFORM_BUS +config SHAKTI_C + bool + select RISCV_ACLINT + select SHAKTI_UART + select SIFIVE_PLIC + select UNIMP + config SIFIVE_E bool - select MSI_NONBROKEN select RISCV_ACLINT select SIFIVE_GPIO select SIFIVE_PLIC @@ -64,7 +64,6 @@ config SIFIVE_E config SIFIVE_U bool select CADENCE - select MSI_NONBROKEN select RISCV_ACLINT select SIFIVE_GPIO select SIFIVE_PDMA @@ -82,6 +81,5 @@ config SPIKE bool select RISCV_NUMA select HTIF - select MSI_NONBROKEN select RISCV_ACLINT select SIFIVE_PLIC diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c index a821263..b10321b 100644 --- a/hw/riscv/microchip_pfsoc.c +++ b/hw/riscv/microchip_pfsoc.c @@ -86,58 +86,61 @@ * describes the complete IOSCB modules memory maps */ static const MemMapEntry microchip_pfsoc_memmap[] = { - [MICROCHIP_PFSOC_RSVD0] = { 0x0, 0x100 }, - [MICROCHIP_PFSOC_DEBUG] = { 0x100, 0xf00 }, - [MICROCHIP_PFSOC_E51_DTIM] = { 0x1000000, 0x2000 }, - [MICROCHIP_PFSOC_BUSERR_UNIT0] = { 0x1700000, 0x1000 }, - [MICROCHIP_PFSOC_BUSERR_UNIT1] = { 0x1701000, 0x1000 }, - [MICROCHIP_PFSOC_BUSERR_UNIT2] = { 0x1702000, 0x1000 }, - [MICROCHIP_PFSOC_BUSERR_UNIT3] = { 0x1703000, 0x1000 }, - [MICROCHIP_PFSOC_BUSERR_UNIT4] = { 0x1704000, 0x1000 }, - [MICROCHIP_PFSOC_CLINT] = { 0x2000000, 0x10000 }, - [MICROCHIP_PFSOC_L2CC] = { 0x2010000, 0x1000 }, - [MICROCHIP_PFSOC_DMA] = { 0x3000000, 0x100000 }, - [MICROCHIP_PFSOC_L2LIM] = { 0x8000000, 0x2000000 }, - [MICROCHIP_PFSOC_PLIC] = { 0xc000000, 0x4000000 }, - [MICROCHIP_PFSOC_MMUART0] = { 0x20000000, 0x1000 }, - [MICROCHIP_PFSOC_WDOG0] = { 0x20001000, 0x1000 }, - [MICROCHIP_PFSOC_SYSREG] = { 0x20002000, 0x2000 }, - [MICROCHIP_PFSOC_AXISW] = { 0x20004000, 0x1000 }, - [MICROCHIP_PFSOC_MPUCFG] = { 0x20005000, 0x1000 }, - [MICROCHIP_PFSOC_FMETER] = { 0x20006000, 0x1000 }, - [MICROCHIP_PFSOC_DDR_SGMII_PHY] = { 0x20007000, 0x1000 }, - [MICROCHIP_PFSOC_EMMC_SD] = { 0x20008000, 0x1000 }, - [MICROCHIP_PFSOC_DDR_CFG] = { 0x20080000, 0x40000 }, - [MICROCHIP_PFSOC_MMUART1] = { 0x20100000, 0x1000 }, - [MICROCHIP_PFSOC_MMUART2] = { 0x20102000, 0x1000 }, - [MICROCHIP_PFSOC_MMUART3] = { 0x20104000, 0x1000 }, - [MICROCHIP_PFSOC_MMUART4] = { 0x20106000, 0x1000 }, - [MICROCHIP_PFSOC_WDOG1] = { 0x20101000, 0x1000 }, - [MICROCHIP_PFSOC_WDOG2] = { 0x20103000, 0x1000 }, - [MICROCHIP_PFSOC_WDOG3] = { 0x20105000, 0x1000 }, - [MICROCHIP_PFSOC_WDOG4] = { 0x20106000, 0x1000 }, - [MICROCHIP_PFSOC_SPI0] = { 0x20108000, 0x1000 }, - [MICROCHIP_PFSOC_SPI1] = { 0x20109000, 0x1000 }, - [MICROCHIP_PFSOC_I2C0] = { 0x2010a000, 0x1000 }, - [MICROCHIP_PFSOC_I2C1] = { 0x2010b000, 0x1000 }, - [MICROCHIP_PFSOC_CAN0] = { 0x2010c000, 0x1000 }, - [MICROCHIP_PFSOC_CAN1] = { 0x2010d000, 0x1000 }, - [MICROCHIP_PFSOC_GEM0] = { 0x20110000, 0x2000 }, - [MICROCHIP_PFSOC_GEM1] = { 0x20112000, 0x2000 }, - [MICROCHIP_PFSOC_GPIO0] = { 0x20120000, 0x1000 }, - [MICROCHIP_PFSOC_GPIO1] = { 0x20121000, 0x1000 }, - [MICROCHIP_PFSOC_GPIO2] = { 0x20122000, 0x1000 }, - [MICROCHIP_PFSOC_RTC] = { 0x20124000, 0x1000 }, - [MICROCHIP_PFSOC_ENVM_CFG] = { 0x20200000, 0x1000 }, - [MICROCHIP_PFSOC_ENVM_DATA] = { 0x20220000, 0x20000 }, - [MICROCHIP_PFSOC_USB] = { 0x20201000, 0x1000 }, - [MICROCHIP_PFSOC_QSPI_XIP] = { 0x21000000, 0x1000000 }, - [MICROCHIP_PFSOC_IOSCB] = { 0x30000000, 0x10000000 }, - [MICROCHIP_PFSOC_FABRIC_FIC3] = { 0x40000000, 0x20000000 }, - [MICROCHIP_PFSOC_DRAM_LO] = { 0x80000000, 0x40000000 }, - [MICROCHIP_PFSOC_DRAM_LO_ALIAS] = { 0xc0000000, 0x40000000 }, - [MICROCHIP_PFSOC_DRAM_HI] = { 0x1000000000, 0x0 }, - [MICROCHIP_PFSOC_DRAM_HI_ALIAS] = { 0x1400000000, 0x0 }, + [MICROCHIP_PFSOC_RSVD0] = { 0x0, 0x100 }, + [MICROCHIP_PFSOC_DEBUG] = { 0x100, 0xf00 }, + [MICROCHIP_PFSOC_E51_DTIM] = { 0x1000000, 0x2000 }, + [MICROCHIP_PFSOC_BUSERR_UNIT0] = { 0x1700000, 0x1000 }, + [MICROCHIP_PFSOC_BUSERR_UNIT1] = { 0x1701000, 0x1000 }, + [MICROCHIP_PFSOC_BUSERR_UNIT2] = { 0x1702000, 0x1000 }, + [MICROCHIP_PFSOC_BUSERR_UNIT3] = { 0x1703000, 0x1000 }, + [MICROCHIP_PFSOC_BUSERR_UNIT4] = { 0x1704000, 0x1000 }, + [MICROCHIP_PFSOC_CLINT] = { 0x2000000, 0x10000 }, + [MICROCHIP_PFSOC_L2CC] = { 0x2010000, 0x1000 }, + [MICROCHIP_PFSOC_DMA] = { 0x3000000, 0x100000 }, + [MICROCHIP_PFSOC_L2LIM] = { 0x8000000, 0x2000000 }, + [MICROCHIP_PFSOC_PLIC] = { 0xc000000, 0x4000000 }, + [MICROCHIP_PFSOC_MMUART0] = { 0x20000000, 0x1000 }, + [MICROCHIP_PFSOC_WDOG0] = { 0x20001000, 0x1000 }, + [MICROCHIP_PFSOC_SYSREG] = { 0x20002000, 0x2000 }, + [MICROCHIP_PFSOC_AXISW] = { 0x20004000, 0x1000 }, + [MICROCHIP_PFSOC_MPUCFG] = { 0x20005000, 0x1000 }, + [MICROCHIP_PFSOC_FMETER] = { 0x20006000, 0x1000 }, + [MICROCHIP_PFSOC_DDR_SGMII_PHY] = { 0x20007000, 0x1000 }, + [MICROCHIP_PFSOC_EMMC_SD] = { 0x20008000, 0x1000 }, + [MICROCHIP_PFSOC_DDR_CFG] = { 0x20080000, 0x40000 }, + [MICROCHIP_PFSOC_MMUART1] = { 0x20100000, 0x1000 }, + [MICROCHIP_PFSOC_MMUART2] = { 0x20102000, 0x1000 }, + [MICROCHIP_PFSOC_MMUART3] = { 0x20104000, 0x1000 }, + [MICROCHIP_PFSOC_MMUART4] = { 0x20106000, 0x1000 }, + [MICROCHIP_PFSOC_WDOG1] = { 0x20101000, 0x1000 }, + [MICROCHIP_PFSOC_WDOG2] = { 0x20103000, 0x1000 }, + [MICROCHIP_PFSOC_WDOG3] = { 0x20105000, 0x1000 }, + [MICROCHIP_PFSOC_WDOG4] = { 0x20106000, 0x1000 }, + [MICROCHIP_PFSOC_SPI0] = { 0x20108000, 0x1000 }, + [MICROCHIP_PFSOC_SPI1] = { 0x20109000, 0x1000 }, + [MICROCHIP_PFSOC_I2C0] = { 0x2010a000, 0x1000 }, + [MICROCHIP_PFSOC_I2C1] = { 0x2010b000, 0x1000 }, + [MICROCHIP_PFSOC_CAN0] = { 0x2010c000, 0x1000 }, + [MICROCHIP_PFSOC_CAN1] = { 0x2010d000, 0x1000 }, + [MICROCHIP_PFSOC_GEM0] = { 0x20110000, 0x2000 }, + [MICROCHIP_PFSOC_GEM1] = { 0x20112000, 0x2000 }, + [MICROCHIP_PFSOC_GPIO0] = { 0x20120000, 0x1000 }, + [MICROCHIP_PFSOC_GPIO1] = { 0x20121000, 0x1000 }, + [MICROCHIP_PFSOC_GPIO2] = { 0x20122000, 0x1000 }, + [MICROCHIP_PFSOC_RTC] = { 0x20124000, 0x1000 }, + [MICROCHIP_PFSOC_ENVM_CFG] = { 0x20200000, 0x1000 }, + [MICROCHIP_PFSOC_ENVM_DATA] = { 0x20220000, 0x20000 }, + [MICROCHIP_PFSOC_USB] = { 0x20201000, 0x1000 }, + [MICROCHIP_PFSOC_QSPI_XIP] = { 0x21000000, 0x1000000 }, + [MICROCHIP_PFSOC_IOSCB] = { 0x30000000, 0x10000000 }, + [MICROCHIP_PFSOC_FABRIC_FIC0] = { 0x2000000000, 0x1000000000 }, + [MICROCHIP_PFSOC_FABRIC_FIC1] = { 0x3000000000, 0x1000000000 }, + [MICROCHIP_PFSOC_FABRIC_FIC3] = { 0x40000000, 0x20000000 }, + [MICROCHIP_PFSOC_DRAM_LO] = { 0x80000000, 0x40000000 }, + [MICROCHIP_PFSOC_DRAM_LO_ALIAS] = { 0xc0000000, 0x40000000 }, + [MICROCHIP_PFSOC_DRAM_HI] = { 0x1000000000, 0x0 }, + [MICROCHIP_PFSOC_DRAM_HI_ALIAS] = { 0x1400000000, 0x0 }, + }; static void microchip_pfsoc_soc_instance_init(Object *obj) @@ -303,6 +306,9 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) sysbus_realize(SYS_BUS_DEVICE(&s->sysreg), errp); sysbus_mmio_map(SYS_BUS_DEVICE(&s->sysreg), 0, memmap[MICROCHIP_PFSOC_SYSREG].base); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->sysreg), 0, + qdev_get_gpio_in(DEVICE(s->plic), + MICROCHIP_PFSOC_MAILBOX_IRQ)); /* AXISW */ create_unimplemented_device("microchip.pfsoc.axisw", @@ -456,11 +462,22 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) sysbus_realize(SYS_BUS_DEVICE(&s->ioscb), errp); sysbus_mmio_map(SYS_BUS_DEVICE(&s->ioscb), 0, memmap[MICROCHIP_PFSOC_IOSCB].base); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ioscb), 0, + qdev_get_gpio_in(DEVICE(s->plic), + MICROCHIP_PFSOC_MAILBOX_IRQ)); /* FPGA Fabric */ create_unimplemented_device("microchip.pfsoc.fabricfic3", memmap[MICROCHIP_PFSOC_FABRIC_FIC3].base, memmap[MICROCHIP_PFSOC_FABRIC_FIC3].size); + /* FPGA Fabric */ + create_unimplemented_device("microchip.pfsoc.fabricfic0", + memmap[MICROCHIP_PFSOC_FABRIC_FIC0].base, + memmap[MICROCHIP_PFSOC_FABRIC_FIC0].size); + /* FPGA Fabric */ + create_unimplemented_device("microchip.pfsoc.fabricfic1", + memmap[MICROCHIP_PFSOC_FABRIC_FIC1].base, + memmap[MICROCHIP_PFSOC_FABRIC_FIC1].size); /* QSPI Flash */ memory_region_init_rom(qspi_xip_mem, OBJECT(dev), diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index be7ff1e..85ffdac 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -28,8 +28,16 @@ #include "qemu/units.h" #include "sysemu/sysemu.h" +/* + * This version of the OpenTitan machine currently supports + * OpenTitan RTL version: + * <lowRISC/opentitan@d072ac505f82152678d6e04be95c72b728a347b8> + * + * MMIO mapping as per (specified commit): + * lowRISC/opentitan: hw/top_earlgrey/sw/autogen/top_earlgrey_memory.h + */ static const MemMapEntry ibex_memmap[] = { - [IBEX_DEV_ROM] = { 0x00008000, 0x8000 }, + [IBEX_DEV_ROM] = { 0x00008000, 0x8000 }, [IBEX_DEV_RAM] = { 0x10000000, 0x20000 }, [IBEX_DEV_FLASH] = { 0x20000000, 0x100000 }, [IBEX_DEV_UART] = { 0x40000000, 0x1000 }, @@ -38,17 +46,18 @@ static const MemMapEntry ibex_memmap[] = { [IBEX_DEV_I2C] = { 0x40080000, 0x1000 }, [IBEX_DEV_PATTGEN] = { 0x400e0000, 0x1000 }, [IBEX_DEV_TIMER] = { 0x40100000, 0x1000 }, - [IBEX_DEV_SENSOR_CTRL] = { 0x40110000, 0x1000 }, [IBEX_DEV_OTP_CTRL] = { 0x40130000, 0x4000 }, [IBEX_DEV_LC_CTRL] = { 0x40140000, 0x1000 }, - [IBEX_DEV_USBDEV] = { 0x40150000, 0x1000 }, + [IBEX_DEV_ALERT_HANDLER] = { 0x40150000, 0x1000 }, [IBEX_DEV_SPI_HOST0] = { 0x40300000, 0x1000 }, [IBEX_DEV_SPI_HOST1] = { 0x40310000, 0x1000 }, + [IBEX_DEV_USBDEV] = { 0x40320000, 0x1000 }, [IBEX_DEV_PWRMGR] = { 0x40400000, 0x1000 }, [IBEX_DEV_RSTMGR] = { 0x40410000, 0x1000 }, [IBEX_DEV_CLKMGR] = { 0x40420000, 0x1000 }, [IBEX_DEV_PINMUX] = { 0x40460000, 0x1000 }, - [IBEX_DEV_PADCTRL] = { 0x40470000, 0x1000 }, + [IBEX_DEV_AON_TIMER] = { 0x40470000, 0x1000 }, + [IBEX_DEV_SENSOR_CTRL] = { 0x40490000, 0x1000 }, [IBEX_DEV_FLASH_CTRL] = { 0x41000000, 0x1000 }, [IBEX_DEV_AES] = { 0x41100000, 0x1000 }, [IBEX_DEV_HMAC] = { 0x41110000, 0x1000 }, @@ -59,10 +68,9 @@ static const MemMapEntry ibex_memmap[] = { [IBEX_DEV_ENTROPY] = { 0x41160000, 0x1000 }, [IBEX_DEV_EDNO] = { 0x41170000, 0x1000 }, [IBEX_DEV_EDN1] = { 0x41180000, 0x1000 }, - [IBEX_DEV_ALERT_HANDLER] = { 0x411b0000, 0x1000 }, [IBEX_DEV_NMI_GEN] = { 0x411c0000, 0x1000 }, [IBEX_DEV_PERI] = { 0x411f0000, 0x10000 }, - [IBEX_DEV_PLIC] = { 0x48000000, 0x4005000 }, + [IBEX_DEV_PLIC] = { 0x48000000, 0x4005000 }, [IBEX_DEV_FLASH_VIRTUAL] = { 0x80000000, 0x80000 }, }; @@ -165,10 +173,8 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) /* PLIC */ qdev_prop_set_string(DEVICE(&s->plic), "hart-config", "M"); - qdev_prop_set_uint32(DEVICE(&s->plic), "hartid-base", 0); qdev_prop_set_uint32(DEVICE(&s->plic), "num-sources", 180); qdev_prop_set_uint32(DEVICE(&s->plic), "num-priorities", 3); - qdev_prop_set_uint32(DEVICE(&s->plic), "priority-base", 0x00); qdev_prop_set_uint32(DEVICE(&s->plic), "pending-base", 0x1000); qdev_prop_set_uint32(DEVICE(&s->plic), "enable-base", 0x2000); qdev_prop_set_uint32(DEVICE(&s->plic), "enable-stride", 32); @@ -265,8 +271,8 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) memmap[IBEX_DEV_CLKMGR].base, memmap[IBEX_DEV_CLKMGR].size); create_unimplemented_device("riscv.lowrisc.ibex.pinmux", memmap[IBEX_DEV_PINMUX].base, memmap[IBEX_DEV_PINMUX].size); - create_unimplemented_device("riscv.lowrisc.ibex.padctrl", - memmap[IBEX_DEV_PADCTRL].base, memmap[IBEX_DEV_PADCTRL].size); + create_unimplemented_device("riscv.lowrisc.ibex.aon_timer", + memmap[IBEX_DEV_AON_TIMER].base, memmap[IBEX_DEV_AON_TIMER].size); create_unimplemented_device("riscv.lowrisc.ibex.usbdev", memmap[IBEX_DEV_USBDEV].base, memmap[IBEX_DEV_USBDEV].size); create_unimplemented_device("riscv.lowrisc.ibex.flash_ctrl", diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index b139824..b40a476 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -287,7 +287,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cells(fdt, nodename, "reg", 0x0, memmap[SIFIVE_U_DEV_PLIC].base, 0x0, memmap[SIFIVE_U_DEV_PLIC].size); - qemu_fdt_setprop_cell(fdt, nodename, "riscv,ndev", 0x35); + qemu_fdt_setprop_cell(fdt, nodename, "riscv,ndev", + SIFIVE_U_PLIC_NUM_SOURCES - 1); qemu_fdt_setprop_cell(fdt, nodename, "phandle", plic_phandle); plic_phandle = qemu_fdt_get_phandle(fdt, nodename); g_free(cells); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 1e1d752..13946ac 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -8,7 +8,6 @@ * * 0) HTIF Console and Poweroff * 1) CLINT (Timer and IPI) - * 2) PLIC (Platform Level Interrupt Controller) * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index a5bc735..94ff2a1 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -468,7 +468,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); qemu_fdt_setprop_cells(mc->fdt, plic_name, "reg", 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); - qemu_fdt_setprop_cell(mc->fdt, plic_name, "riscv,ndev", VIRTIO_NDEV); + qemu_fdt_setprop_cell(mc->fdt, plic_name, "riscv,ndev", + VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(mc, mc->fdt, plic_name, socket); qemu_fdt_setprop_cell(mc->fdt, plic_name, "phandle", plic_phandles[socket]); @@ -546,8 +547,6 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, riscv_socket_count(mc) * sizeof(uint32_t) * 4); qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", VIRT_IRQCHIP_NUM_MSIS); - qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", - VIRT_IRQCHIP_IPI_MSI); if (riscv_socket_count(mc) > 1) { qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits", imsic_num_bits(imsic_max_hart_per_socket)); @@ -597,8 +596,6 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, riscv_socket_count(mc) * sizeof(uint32_t) * 4); qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", VIRT_IRQCHIP_NUM_MSIS); - qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", - VIRT_IRQCHIP_IPI_MSI); if (imsic_guest_bits) { qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,guest-index-bits", imsic_guest_bits); diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c index ec0fa44..3a86978 100644 --- a/hw/timer/imx_epit.c +++ b/hw/timer/imx_epit.c @@ -6,6 +6,7 @@ * Originally written by Hans Jiang * Updated by Peter Chubb * Updated by Jean-Christophe Dubois <jcd@tribudubois.net> + * Updated by Axel Heider * * This code is licensed under GPL version 2 or later. See * the COPYING file in the top-level directory. @@ -66,73 +67,54 @@ static const IMXClk imx_epit_clocks[] = { */ static void imx_epit_update_int(IMXEPITState *s) { - if (s->sr && (s->cr & CR_OCIEN) && (s->cr & CR_EN)) { + if ((s->sr & SR_OCIF) && (s->cr & CR_OCIEN) && (s->cr & CR_EN)) { qemu_irq_raise(s->irq); } else { qemu_irq_lower(s->irq); } } -/* - * Must be called from within a ptimer_transaction_begin/commit block - * for both s->timer_cmp and s->timer_reload. - */ -static void imx_epit_set_freq(IMXEPITState *s) +static uint32_t imx_epit_get_freq(IMXEPITState *s) { - uint32_t clksrc; - uint32_t prescaler; - - clksrc = extract32(s->cr, CR_CLKSRC_SHIFT, 2); - prescaler = 1 + extract32(s->cr, CR_PRESCALE_SHIFT, 12); - - s->freq = imx_ccm_get_clock_frequency(s->ccm, - imx_epit_clocks[clksrc]) / prescaler; - - DPRINTF("Setting ptimer frequency to %u\n", s->freq); - - if (s->freq) { - ptimer_set_freq(s->timer_reload, s->freq); - ptimer_set_freq(s->timer_cmp, s->freq); - } + uint32_t clksrc = extract32(s->cr, CR_CLKSRC_SHIFT, CR_CLKSRC_BITS); + uint32_t prescaler = 1 + extract32(s->cr, CR_PRESCALE_SHIFT, CR_PRESCALE_BITS); + uint32_t f_in = imx_ccm_get_clock_frequency(s->ccm, imx_epit_clocks[clksrc]); + uint32_t freq = f_in / prescaler; + DPRINTF("ptimer frequency is %u\n", freq); + return freq; } -static void imx_epit_reset(DeviceState *dev) +/* + * This is called both on hardware (device) reset and software reset. + */ +static void imx_epit_reset(IMXEPITState *s, bool is_hard_reset) { - IMXEPITState *s = IMX_EPIT(dev); - - /* - * Soft reset doesn't touch some bits; hard reset clears them - */ - s->cr &= (CR_EN|CR_ENMOD|CR_STOPEN|CR_DOZEN|CR_WAITEN|CR_DBGEN); + /* Soft reset doesn't touch some bits; hard reset clears them */ + if (is_hard_reset) { + s->cr = 0; + } else { + s->cr &= (CR_EN|CR_ENMOD|CR_STOPEN|CR_DOZEN|CR_WAITEN|CR_DBGEN); + } s->sr = 0; s->lr = EPIT_TIMER_MAX; s->cmp = 0; - s->cnt = 0; ptimer_transaction_begin(s->timer_cmp); ptimer_transaction_begin(s->timer_reload); - /* stop both timers */ + + /* + * The reset switches off the input clock, so even if the CR.EN is still + * set, the timers are no longer running. + */ + assert(imx_epit_get_freq(s) == 0); ptimer_stop(s->timer_cmp); ptimer_stop(s->timer_reload); - /* compute new frequency */ - imx_epit_set_freq(s); /* init both timers to EPIT_TIMER_MAX */ ptimer_set_limit(s->timer_cmp, EPIT_TIMER_MAX, 1); ptimer_set_limit(s->timer_reload, EPIT_TIMER_MAX, 1); - if (s->freq && (s->cr & CR_EN)) { - /* if the timer is still enabled, restart it */ - ptimer_run(s->timer_reload, 0); - } ptimer_transaction_commit(s->timer_cmp); ptimer_transaction_commit(s->timer_reload); } -static uint32_t imx_epit_update_count(IMXEPITState *s) -{ - s->cnt = ptimer_get_count(s->timer_reload); - - return s->cnt; -} - static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size) { IMXEPITState *s = IMX_EPIT(opaque); @@ -156,8 +138,7 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size) break; case 4: /* CNT */ - imx_epit_update_count(s); - reg_value = s->cnt; + reg_value = ptimer_get_count(s->timer_reload); break; default: @@ -171,144 +152,219 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size) return reg_value; } -/* Must be called from ptimer_transaction_begin/commit block for s->timer_cmp */ -static void imx_epit_reload_compare_timer(IMXEPITState *s) +/* + * Must be called from a ptimer_transaction_begin/commit block for + * s->timer_cmp, but outside of a transaction block of s->timer_reload, + * so the proper counter value is read. + */ +static void imx_epit_update_compare_timer(IMXEPITState *s) { - if ((s->cr & (CR_EN | CR_OCIEN)) == (CR_EN | CR_OCIEN)) { - /* if the compare feature is on and timers are running */ - uint32_t tmp = imx_epit_update_count(s); - uint64_t next; - if (tmp > s->cmp) { - /* It'll fire in this round of the timer */ - next = tmp - s->cmp; - } else { /* catch it next time around */ - next = tmp - s->cmp + ((s->cr & CR_RLD) ? EPIT_TIMER_MAX : s->lr); + uint64_t counter = 0; + bool is_oneshot = false; + /* + * The compare timer only has to run if the timer peripheral is active + * and there is an input clock, Otherwise it can be switched off. + */ + bool is_active = (s->cr & CR_EN) && imx_epit_get_freq(s); + if (is_active) { + /* + * Calculate next timeout for compare timer. Reading the reload + * counter returns proper results only if pending transactions + * on it are committed here. Otherwise stale values are be read. + */ + counter = ptimer_get_count(s->timer_reload); + uint64_t limit = ptimer_get_limit(s->timer_cmp); + /* + * The compare timer is a periodic timer if the limit is at least + * the compare value. Otherwise it may fire at most once in the + * current round. + */ + bool is_oneshot = (limit >= s->cmp); + if (counter >= s->cmp) { + /* The compare timer fires in the current round. */ + counter -= s->cmp; + } else if (!is_oneshot) { + /* + * The compare timer fires after a reload, as it is below the + * compare value already in this round. Note that the counter + * value calculated below can be above the 32-bit limit, which + * is legal here because the compare timer is an internal + * helper ptimer only. + */ + counter += limit - s->cmp; + } else { + /* + * The compare timer won't fire in this round, and the limit is + * set to a value below the compare value. This practically means + * it will never fire, so it can be switched off. + */ + is_active = false; } - ptimer_set_count(s->timer_cmp, next); } + + /* + * Set the compare timer and let it run, or stop it. This is agnostic + * of CR.OCIEN bit, as this bit affects interrupt generation only. The + * compare timer needs to run even if no interrupts are to be generated, + * because the SR.OCIF bit must be updated also. + * Note that the timer might already be stopped or be running with + * counter values. However, finding out when an update is needed and + * when not is not trivial. It's much easier applying the setting again, + * as this does not harm either and the overhead is negligible. + */ + if (is_active) { + ptimer_set_count(s->timer_cmp, counter); + ptimer_run(s->timer_cmp, is_oneshot ? 1 : 0); + } else { + ptimer_stop(s->timer_cmp); + } + } -static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value, - unsigned size) +static void imx_epit_write_cr(IMXEPITState *s, uint32_t value) { - IMXEPITState *s = IMX_EPIT(opaque); - uint64_t oldcr; + uint32_t oldcr = s->cr; - DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(offset >> 2), - (uint32_t)value); - - switch (offset >> 2) { - case 0: /* CR */ + s->cr = value & 0x03ffffff; - oldcr = s->cr; - s->cr = value & 0x03ffffff; - if (s->cr & CR_SWR) { - /* handle the reset */ - imx_epit_reset(DEVICE(s)); - /* - * TODO: could we 'break' here? following operations appear - * to duplicate the work imx_epit_reset() already did. - */ - } + if (s->cr & CR_SWR) { + /* + * Reset clears CR.SWR again. It does not touch CR.EN, but the timers + * are still stopped because the input clock is disabled. + */ + imx_epit_reset(s, false); + } else { + uint32_t freq; + uint32_t toggled_cr_bits = oldcr ^ s->cr; + /* re-initialize the limits if CR.RLD has changed */ + bool set_limit = toggled_cr_bits & CR_RLD; + /* set the counter if the timer got just enabled and CR.ENMOD is set */ + bool is_switched_on = (toggled_cr_bits & s->cr) & CR_EN; + bool set_counter = is_switched_on && (s->cr & CR_ENMOD); ptimer_transaction_begin(s->timer_cmp); ptimer_transaction_begin(s->timer_reload); - - if (!(s->cr & CR_SWR)) { - imx_epit_set_freq(s); + freq = imx_epit_get_freq(s); + if (freq) { + ptimer_set_freq(s->timer_reload, freq); + ptimer_set_freq(s->timer_cmp, freq); } - if (s->freq && (s->cr & CR_EN) && !(oldcr & CR_EN)) { - if (s->cr & CR_ENMOD) { - if (s->cr & CR_RLD) { - ptimer_set_limit(s->timer_reload, s->lr, 1); - ptimer_set_limit(s->timer_cmp, s->lr, 1); - } else { - ptimer_set_limit(s->timer_reload, EPIT_TIMER_MAX, 1); - ptimer_set_limit(s->timer_cmp, EPIT_TIMER_MAX, 1); - } + if (set_limit || set_counter) { + uint64_t limit = (s->cr & CR_RLD) ? s->lr : EPIT_TIMER_MAX; + ptimer_set_limit(s->timer_reload, limit, set_counter ? 1 : 0); + if (set_limit) { + ptimer_set_limit(s->timer_cmp, limit, 0); } - - imx_epit_reload_compare_timer(s); + } + /* + * If there is an input clock and the peripheral is enabled, then + * ensure the wall clock timer is ticking. Otherwise stop the timers. + * The compare timer will be updated later. + */ + if (freq && (s->cr & CR_EN)) { ptimer_run(s->timer_reload, 0); - if (s->cr & CR_OCIEN) { - ptimer_run(s->timer_cmp, 0); - } else { - ptimer_stop(s->timer_cmp); - } - } else if (!(s->cr & CR_EN)) { - /* stop both timers */ - ptimer_stop(s->timer_reload); - ptimer_stop(s->timer_cmp); - } else if (s->cr & CR_OCIEN) { - if (!(oldcr & CR_OCIEN)) { - imx_epit_reload_compare_timer(s); - ptimer_run(s->timer_cmp, 0); - } } else { - ptimer_stop(s->timer_cmp); + ptimer_stop(s->timer_reload); } - - ptimer_transaction_commit(s->timer_cmp); + /* Commit changes to reload timer, so they can propagate. */ ptimer_transaction_commit(s->timer_reload); - break; + /* Update compare timer based on the committed reload timer value. */ + imx_epit_update_compare_timer(s); + ptimer_transaction_commit(s->timer_cmp); + } - case 1: /* SR - ACK*/ - /* writing 1 to OCIF clear the OCIF bit */ - if (value & 0x01) { - s->sr = 0; - imx_epit_update_int(s); - } - break; + /* + * The interrupt state can change due to: + * - reset clears both SR.OCIF and CR.OCIE + * - write to CR.EN or CR.OCIE + */ + imx_epit_update_int(s); +} - case 2: /* LR - set ticks */ - s->lr = value; +static void imx_epit_write_sr(IMXEPITState *s, uint32_t value) +{ + /* writing 1 to SR.OCIF clears this bit and turns the interrupt off */ + if (value & SR_OCIF) { + s->sr = 0; /* SR.OCIF is the only bit in this register anyway */ + imx_epit_update_int(s); + } +} - ptimer_transaction_begin(s->timer_cmp); - ptimer_transaction_begin(s->timer_reload); - if (s->cr & CR_RLD) { - /* Also set the limit if the LRD bit is set */ - /* If IOVW bit is set then set the timer value */ - ptimer_set_limit(s->timer_reload, s->lr, s->cr & CR_IOVW); - ptimer_set_limit(s->timer_cmp, s->lr, 0); - } else if (s->cr & CR_IOVW) { - /* If IOVW bit is set then set the timer value */ - ptimer_set_count(s->timer_reload, s->lr); - } - /* - * Commit the change to s->timer_reload, so it can propagate. Otherwise - * the timer interrupt may not fire properly. The commit must happen - * before calling imx_epit_reload_compare_timer(), which reads - * s->timer_reload internally again. - */ - ptimer_transaction_commit(s->timer_reload); - imx_epit_reload_compare_timer(s); - ptimer_transaction_commit(s->timer_cmp); +static void imx_epit_write_lr(IMXEPITState *s, uint32_t value) +{ + s->lr = value; + + ptimer_transaction_begin(s->timer_cmp); + ptimer_transaction_begin(s->timer_reload); + if (s->cr & CR_RLD) { + /* Also set the limit if the LRD bit is set */ + /* If IOVW bit is set then set the timer value */ + ptimer_set_limit(s->timer_reload, s->lr, s->cr & CR_IOVW); + ptimer_set_limit(s->timer_cmp, s->lr, 0); + } else if (s->cr & CR_IOVW) { + /* If IOVW bit is set then set the timer value */ + ptimer_set_count(s->timer_reload, s->lr); + } + /* Commit the changes to s->timer_reload, so they can propagate. */ + ptimer_transaction_commit(s->timer_reload); + /* Update the compare timer based on the committed reload timer value. */ + imx_epit_update_compare_timer(s); + ptimer_transaction_commit(s->timer_cmp); +} + +static void imx_epit_write_cmp(IMXEPITState *s, uint32_t value) +{ + s->cmp = value; + + /* Update the compare timer based on the committed reload timer value. */ + ptimer_transaction_begin(s->timer_cmp); + imx_epit_update_compare_timer(s); + ptimer_transaction_commit(s->timer_cmp); +} + +static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + IMXEPITState *s = IMX_EPIT(opaque); + + DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(offset >> 2), + (uint32_t)value); + + switch (offset >> 2) { + case 0: /* CR */ + imx_epit_write_cr(s, (uint32_t)value); break; - case 3: /* CMP */ - s->cmp = value; + case 1: /* SR */ + imx_epit_write_sr(s, (uint32_t)value); + break; - ptimer_transaction_begin(s->timer_cmp); - imx_epit_reload_compare_timer(s); - ptimer_transaction_commit(s->timer_cmp); + case 2: /* LR */ + imx_epit_write_lr(s, (uint32_t)value); + break; + case 3: /* CMP */ + imx_epit_write_cmp(s, (uint32_t)value); break; default: qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset); - break; } } + static void imx_epit_cmp(void *opaque) { IMXEPITState *s = IMX_EPIT(opaque); - DPRINTF("sr was %d\n", s->sr); + /* The cmp ptimer can't be running when the peripheral is disabled */ + assert(s->cr & CR_EN); - s->sr = 1; + DPRINTF("sr was %d\n", s->sr); + /* Set interrupt status bit SR.OCIF and update the interrupt state */ + s->sr |= SR_OCIF; imx_epit_update_int(s); } @@ -325,15 +381,13 @@ static const MemoryRegionOps imx_epit_ops = { static const VMStateDescription vmstate_imx_timer_epit = { .name = TYPE_IMX_EPIT, - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, .fields = (VMStateField[]) { VMSTATE_UINT32(cr, IMXEPITState), VMSTATE_UINT32(sr, IMXEPITState), VMSTATE_UINT32(lr, IMXEPITState), VMSTATE_UINT32(cmp, IMXEPITState), - VMSTATE_UINT32(cnt, IMXEPITState), - VMSTATE_UINT32(freq, IMXEPITState), VMSTATE_PTIMER(timer_reload, IMXEPITState), VMSTATE_PTIMER(timer_cmp, IMXEPITState), VMSTATE_END_OF_LIST() @@ -352,17 +406,33 @@ static void imx_epit_realize(DeviceState *dev, Error **errp) 0x00001000); sysbus_init_mmio(sbd, &s->iomem); + /* + * The reload timer keeps running when the peripheral is enabled. It is a + * kind of wall clock that does not generate any interrupts. The callback + * needs to be provided, but it does nothing as the ptimer already supports + * all necessary reloading functionality. + */ s->timer_reload = ptimer_init(imx_epit_reload, s, PTIMER_POLICY_LEGACY); + /* + * The compare timer is running only when the peripheral configuration is + * in a state that will generate compare interrupts. + */ s->timer_cmp = ptimer_init(imx_epit_cmp, s, PTIMER_POLICY_LEGACY); } +static void imx_epit_dev_reset(DeviceState *dev) +{ + IMXEPITState *s = IMX_EPIT(dev); + imx_epit_reset(s, true); +} + static void imx_epit_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = imx_epit_realize; - dc->reset = imx_epit_reset; + dc->reset = imx_epit_dev_reset; dc->vmsd = &vmstate_imx_timer_epit; dc->desc = "i.MX periodic timer"; } diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c index 80b8302..7222b1b 100644 --- a/hw/timer/imx_gpt.c +++ b/hw/timer/imx_gpt.c @@ -115,6 +115,17 @@ static const IMXClk imx6_gpt_clocks[] = { CLK_HIGH, /* 111 reference clock */ }; +static const IMXClk imx6ul_gpt_clocks[] = { + CLK_NONE, /* 000 No clock source */ + CLK_IPG, /* 001 ipg_clk, 532MHz*/ + CLK_IPG_HIGH, /* 010 ipg_clk_highfreq */ + CLK_EXT, /* 011 External clock */ + CLK_32k, /* 100 ipg_clk_32k */ + CLK_NONE, /* 101 not defined */ + CLK_NONE, /* 110 not defined */ + CLK_NONE, /* 111 not defined */ +}; + static const IMXClk imx7_gpt_clocks[] = { CLK_NONE, /* 000 No clock source */ CLK_IPG, /* 001 ipg_clk, 532MHz*/ @@ -539,6 +550,13 @@ static void imx6_gpt_init(Object *obj) s->clocks = imx6_gpt_clocks; } +static void imx6ul_gpt_init(Object *obj) +{ + IMXGPTState *s = IMX_GPT(obj); + + s->clocks = imx6ul_gpt_clocks; +} + static void imx7_gpt_init(Object *obj) { IMXGPTState *s = IMX_GPT(obj); @@ -566,6 +584,12 @@ static const TypeInfo imx6_gpt_info = { .instance_init = imx6_gpt_init, }; +static const TypeInfo imx6ul_gpt_info = { + .name = TYPE_IMX6UL_GPT, + .parent = TYPE_IMX25_GPT, + .instance_init = imx6ul_gpt_init, +}; + static const TypeInfo imx7_gpt_info = { .name = TYPE_IMX7_GPT, .parent = TYPE_IMX25_GPT, @@ -577,6 +601,7 @@ static void imx_gpt_register_types(void) type_register_static(&imx25_gpt_info); type_register_static(&imx31_gpt_info); type_register_static(&imx6_gpt_info); + type_register_static(&imx6ul_gpt_info); type_register_static(&imx7_gpt_info); } diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index e242fed..bc6698b 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -133,6 +133,6 @@ #define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) -/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ +/* MAX_CALL_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ #endif /* EXEC_HELPER_HEAD_H */ diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h index 1c5fa6f..4e5e071 100644 --- a/include/hw/arm/fsl-imx7.h +++ b/include/hw/arm/fsl-imx7.h @@ -235,6 +235,26 @@ enum FslIMX7IRQs { FSL_IMX7_USB2_IRQ = 42, FSL_IMX7_USB3_IRQ = 40, + FSL_IMX7_GPT1_IRQ = 55, + FSL_IMX7_GPT2_IRQ = 54, + FSL_IMX7_GPT3_IRQ = 53, + FSL_IMX7_GPT4_IRQ = 52, + + FSL_IMX7_GPIO1_LOW_IRQ = 64, + FSL_IMX7_GPIO1_HIGH_IRQ = 65, + FSL_IMX7_GPIO2_LOW_IRQ = 66, + FSL_IMX7_GPIO2_HIGH_IRQ = 67, + FSL_IMX7_GPIO3_LOW_IRQ = 68, + FSL_IMX7_GPIO3_HIGH_IRQ = 69, + FSL_IMX7_GPIO4_LOW_IRQ = 70, + FSL_IMX7_GPIO4_HIGH_IRQ = 71, + FSL_IMX7_GPIO5_LOW_IRQ = 72, + FSL_IMX7_GPIO5_HIGH_IRQ = 73, + FSL_IMX7_GPIO6_LOW_IRQ = 74, + FSL_IMX7_GPIO6_HIGH_IRQ = 75, + FSL_IMX7_GPIO7_LOW_IRQ = 76, + FSL_IMX7_GPIO7_HIGH_IRQ = 77, + FSL_IMX7_WDOG1_IRQ = 78, FSL_IMX7_WDOG2_IRQ = 79, FSL_IMX7_WDOG3_IRQ = 10, diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h index 21e6234..c5683af 100644 --- a/include/hw/arm/smmu-common.h +++ b/include/hw/arm/smmu-common.h @@ -173,7 +173,4 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, /* Unmap the range of all the notifiers registered to any IOMMU mr */ void smmu_inv_notifiers_all(SMMUState *s); -/* Unmap the range of all the notifiers registered to @mr */ -void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr); - #endif /* HW_ARM_SMMU_COMMON_H */ diff --git a/include/hw/input/tsc2xxx.h b/include/hw/input/tsc2xxx.h index 5b76ebc..00eca17 100644 --- a/include/hw/input/tsc2xxx.h +++ b/include/hw/input/tsc2xxx.h @@ -30,12 +30,12 @@ uWireSlave *tsc2102_init(qemu_irq pint); uWireSlave *tsc2301_init(qemu_irq penirq, qemu_irq kbirq, qemu_irq dav); I2SCodec *tsc210x_codec(uWireSlave *chip); uint32_t tsc210x_txrx(void *opaque, uint32_t value, int len); -void tsc210x_set_transform(uWireSlave *chip, MouseTransformInfo *info); +void tsc210x_set_transform(uWireSlave *chip, const MouseTransformInfo *info); void tsc210x_key_event(uWireSlave *chip, int key, int down); /* tsc2005.c */ void *tsc2005_init(qemu_irq pintdav); uint32_t tsc2005_txrx(void *opaque, uint32_t value, int len); -void tsc2005_set_transform(void *opaque, MouseTransformInfo *info); +void tsc2005_set_transform(void *opaque, const MouseTransformInfo *info); #endif diff --git a/include/hw/intc/loongarch_pch_msi.h b/include/hw/intc/loongarch_pch_msi.h index 6d67560..832e69f 100644 --- a/include/hw/intc/loongarch_pch_msi.h +++ b/include/hw/intc/loongarch_pch_msi.h @@ -8,15 +8,16 @@ #define TYPE_LOONGARCH_PCH_MSI "loongarch_pch_msi" OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHMSI, LOONGARCH_PCH_MSI) -/* Msi irq start start from 64 to 255 */ -#define PCH_MSI_IRQ_START 64 +/* MSI irq start from 32 to 255 */ +#define PCH_MSI_IRQ_START 32 #define PCH_MSI_IRQ_END 255 -#define PCH_MSI_IRQ_NUM 192 +#define PCH_MSI_IRQ_NUM 224 struct LoongArchPCHMSI { SysBusDevice parent_obj; - qemu_irq pch_msi_irq[PCH_MSI_IRQ_NUM]; + qemu_irq *pch_msi_irq; MemoryRegion msi_mmio; /* irq base passed to upper extioi intc */ unsigned int irq_base; + unsigned int irq_num; }; diff --git a/include/hw/intc/loongarch_pch_pic.h b/include/hw/intc/loongarch_pch_pic.h index 2d4aa9e..258e3b3 100644 --- a/include/hw/intc/loongarch_pch_pic.h +++ b/include/hw/intc/loongarch_pch_pic.h @@ -9,11 +9,8 @@ #define PCH_PIC_NAME(name) TYPE_LOONGARCH_PCH_PIC#name OBJECT_DECLARE_SIMPLE_TYPE(LoongArchPCHPIC, LOONGARCH_PCH_PIC) -#define PCH_PIC_IRQ_START 0 -#define PCH_PIC_IRQ_END 63 -#define PCH_PIC_IRQ_NUM 64 #define PCH_PIC_INT_ID_VAL 0x7000000UL -#define PCH_PIC_INT_ID_NUM 0x3f0001UL +#define PCH_PIC_INT_ID_VER 0x1UL #define PCH_PIC_INT_ID_LO 0x00 #define PCH_PIC_INT_ID_HI 0x04 @@ -66,4 +63,5 @@ struct LoongArchPCHPIC { MemoryRegion iomem32_low; MemoryRegion iomem32_high; MemoryRegion iomem8; + unsigned int irq_num; }; diff --git a/include/hw/intc/sifive_plic.h b/include/hw/intc/sifive_plic.h index 134cf39..d3f45ec 100644 --- a/include/hw/intc/sifive_plic.h +++ b/include/hw/intc/sifive_plic.h @@ -33,7 +33,6 @@ DECLARE_INSTANCE_CHECKER(SiFivePLICState, SIFIVE_PLIC, typedef enum PLICMode { PLICMode_U, PLICMode_S, - PLICMode_H, PLICMode_M } PLICMode; diff --git a/include/hw/misc/mchp_pfsoc_ioscb.h b/include/hw/misc/mchp_pfsoc_ioscb.h index 9235523..a110486 100644 --- a/include/hw/misc/mchp_pfsoc_ioscb.h +++ b/include/hw/misc/mchp_pfsoc_ioscb.h @@ -29,7 +29,10 @@ typedef struct MchpPfSoCIoscbState { MemoryRegion lane01; MemoryRegion lane23; MemoryRegion ctrl; + MemoryRegion qspixip; + MemoryRegion mailbox; MemoryRegion cfg; + MemoryRegion ccc; MemoryRegion pll_mss; MemoryRegion cfm_mss; MemoryRegion pll_ddr; @@ -40,6 +43,7 @@ typedef struct MchpPfSoCIoscbState { MemoryRegion cfm_sgmii; MemoryRegion bc_sgmii; MemoryRegion io_calib_sgmii; + qemu_irq irq; } MchpPfSoCIoscbState; #define TYPE_MCHP_PFSOC_IOSCB "mchp.pfsoc.ioscb" diff --git a/include/hw/misc/mchp_pfsoc_sysreg.h b/include/hw/misc/mchp_pfsoc_sysreg.h index 546ba68..3cebe40 100644 --- a/include/hw/misc/mchp_pfsoc_sysreg.h +++ b/include/hw/misc/mchp_pfsoc_sysreg.h @@ -28,6 +28,7 @@ typedef struct MchpPfSoCSysregState { SysBusDevice parent; MemoryRegion sysreg; + qemu_irq irq; } MchpPfSoCSysregState; #define TYPE_MCHP_PFSOC_SYSREG "mchp.pfsoc.sysreg" diff --git a/include/hw/pci-host/ls7a.h b/include/hw/pci-host/ls7a.h index df7fa55..8061c4b 100644 --- a/include/hw/pci-host/ls7a.h +++ b/include/hw/pci-host/ls7a.h @@ -32,9 +32,9 @@ * 0 ~ 16 irqs used for non-pci device while 16 ~ 64 irqs * used for pci device. */ +#define VIRT_PCH_PIC_IRQ_NUM 32 #define PCH_PIC_IRQ_OFFSET 64 #define VIRT_DEVICE_IRQS 16 -#define VIRT_PCI_IRQS 48 #define VIRT_UART_IRQ (PCH_PIC_IRQ_OFFSET + 2) #define VIRT_UART_BASE 0x1fe001e0 #define VIRT_UART_SIZE 0X100 diff --git a/include/hw/riscv/microchip_pfsoc.h b/include/hw/riscv/microchip_pfsoc.h index a757b24..e65ffeb 100644 --- a/include/hw/riscv/microchip_pfsoc.h +++ b/include/hw/riscv/microchip_pfsoc.h @@ -121,6 +121,8 @@ enum { MICROCHIP_PFSOC_USB, MICROCHIP_PFSOC_QSPI_XIP, MICROCHIP_PFSOC_IOSCB, + MICROCHIP_PFSOC_FABRIC_FIC0, + MICROCHIP_PFSOC_FABRIC_FIC1, MICROCHIP_PFSOC_FABRIC_FIC3, MICROCHIP_PFSOC_DRAM_LO, MICROCHIP_PFSOC_DRAM_LO_ALIAS, @@ -145,14 +147,15 @@ enum { MICROCHIP_PFSOC_MMUART2_IRQ = 92, MICROCHIP_PFSOC_MMUART3_IRQ = 93, MICROCHIP_PFSOC_MMUART4_IRQ = 94, + MICROCHIP_PFSOC_MAILBOX_IRQ = 96, }; #define MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT 1 #define MICROCHIP_PFSOC_COMPUTE_CPU_COUNT 4 -#define MICROCHIP_PFSOC_PLIC_NUM_SOURCES 185 +#define MICROCHIP_PFSOC_PLIC_NUM_SOURCES 187 #define MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES 7 -#define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE 0x04 +#define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE 0x00 #define MICROCHIP_PFSOC_PLIC_PENDING_BASE 0x1000 #define MICROCHIP_PFSOC_PLIC_ENABLE_BASE 0x2000 #define MICROCHIP_PFSOC_PLIC_ENABLE_STRIDE 0x80 diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h index 6665cd5..7659d1b 100644 --- a/include/hw/riscv/opentitan.h +++ b/include/hw/riscv/opentitan.h @@ -81,7 +81,7 @@ enum { IBEX_DEV_RSTMGR, IBEX_DEV_CLKMGR, IBEX_DEV_PINMUX, - IBEX_DEV_PADCTRL, + IBEX_DEV_AON_TIMER, IBEX_DEV_USBDEV, IBEX_DEV_FLASH_CTRL, IBEX_DEV_PLIC, @@ -109,10 +109,10 @@ enum { IBEX_UART0_RX_TIMEOUT_IRQ = 7, IBEX_UART0_RX_PARITY_ERR_IRQ = 8, IBEX_TIMER_TIMEREXPIRED0_0 = 127, - IBEX_SPI_HOST0_ERR_IRQ = 151, - IBEX_SPI_HOST0_SPI_EVENT_IRQ = 152, - IBEX_SPI_HOST1_ERR_IRQ = 153, - IBEX_SPI_HOST1_SPI_EVENT_IRQ = 154, + IBEX_SPI_HOST0_ERR_IRQ = 134, + IBEX_SPI_HOST0_SPI_EVENT_IRQ = 135, + IBEX_SPI_HOST1_ERR_IRQ = 136, + IBEX_SPI_HOST1_SPI_EVENT_IRQ = 137, }; #endif diff --git a/include/hw/riscv/shakti_c.h b/include/hw/riscv/shakti_c.h index daf0aae..539fe11 100644 --- a/include/hw/riscv/shakti_c.h +++ b/include/hw/riscv/shakti_c.h @@ -65,7 +65,7 @@ enum { #define SHAKTI_C_PLIC_NUM_SOURCES 28 /* Excluding Priority 0 */ #define SHAKTI_C_PLIC_NUM_PRIORITIES 2 -#define SHAKTI_C_PLIC_PRIORITY_BASE 0x04 +#define SHAKTI_C_PLIC_PRIORITY_BASE 0x00 #define SHAKTI_C_PLIC_PENDING_BASE 0x1000 #define SHAKTI_C_PLIC_ENABLE_BASE 0x2000 #define SHAKTI_C_PLIC_ENABLE_STRIDE 0x80 diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h index d738745..b824a79 100644 --- a/include/hw/riscv/sifive_e.h +++ b/include/hw/riscv/sifive_e.h @@ -82,9 +82,14 @@ enum { }; #define SIFIVE_E_PLIC_HART_CONFIG "M" -#define SIFIVE_E_PLIC_NUM_SOURCES 127 +/* + * Freedom E310 G002 and G003 supports 52 interrupt sources while + * Freedom E310 G000 supports 51 interrupt sources. We use the value + * of G002 and G003, so it is 53 (including interrupt source 0). + */ +#define SIFIVE_E_PLIC_NUM_SOURCES 53 #define SIFIVE_E_PLIC_NUM_PRIORITIES 7 -#define SIFIVE_E_PLIC_PRIORITY_BASE 0x04 +#define SIFIVE_E_PLIC_PRIORITY_BASE 0x00 #define SIFIVE_E_PLIC_PENDING_BASE 0x1000 #define SIFIVE_E_PLIC_ENABLE_BASE 0x2000 #define SIFIVE_E_PLIC_ENABLE_STRIDE 0x80 diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h index 8f63a18..e680d61 100644 --- a/include/hw/riscv/sifive_u.h +++ b/include/hw/riscv/sifive_u.h @@ -158,7 +158,7 @@ enum { #define SIFIVE_U_PLIC_NUM_SOURCES 54 #define SIFIVE_U_PLIC_NUM_PRIORITIES 7 -#define SIFIVE_U_PLIC_PRIORITY_BASE 0x04 +#define SIFIVE_U_PLIC_PRIORITY_BASE 0x00 #define SIFIVE_U_PLIC_PENDING_BASE 0x1000 #define SIFIVE_U_PLIC_ENABLE_BASE 0x2000 #define SIFIVE_U_PLIC_ENABLE_STRIDE 0x80 diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index be4ab8f..3407c9e 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -87,20 +87,18 @@ enum { VIRTIO_IRQ = 1, /* 1 to 8 */ VIRTIO_COUNT = 8, PCIE_IRQ = 0x20, /* 32 to 35 */ - VIRT_PLATFORM_BUS_IRQ = 64, /* 64 to 96 */ - VIRTIO_NDEV = 96 /* Arbitrary maximum number of interrupts */ + VIRT_PLATFORM_BUS_IRQ = 64, /* 64 to 95 */ }; #define VIRT_PLATFORM_BUS_NUM_IRQS 32 -#define VIRT_IRQCHIP_IPI_MSI 1 #define VIRT_IRQCHIP_NUM_MSIS 255 -#define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV +#define VIRT_IRQCHIP_NUM_SOURCES 96 #define VIRT_IRQCHIP_NUM_PRIO_BITS 3 #define VIRT_IRQCHIP_MAX_GUESTS_BITS 3 #define VIRT_IRQCHIP_MAX_GUESTS ((1U << VIRT_IRQCHIP_MAX_GUESTS_BITS) - 1U) -#define VIRT_PLIC_PRIORITY_BASE 0x04 +#define VIRT_PLIC_PRIORITY_BASE 0x00 #define VIRT_PLIC_PENDING_BASE 0x1000 #define VIRT_PLIC_ENABLE_BASE 0x2000 #define VIRT_PLIC_ENABLE_STRIDE 0x80 diff --git a/include/hw/timer/imx_epit.h b/include/hw/timer/imx_epit.h index 2acc41e..79aff0c 100644 --- a/include/hw/timer/imx_epit.h +++ b/include/hw/timer/imx_epit.h @@ -43,7 +43,7 @@ #define CR_OCIEN (1 << 2) #define CR_RLD (1 << 3) #define CR_PRESCALE_SHIFT (4) -#define CR_PRESCALE_MASK (0xfff) +#define CR_PRESCALE_BITS (12) #define CR_SWR (1 << 16) #define CR_IOVW (1 << 17) #define CR_DBGEN (1 << 18) @@ -51,7 +51,9 @@ #define CR_DOZEN (1 << 20) #define CR_STOPEN (1 << 21) #define CR_CLKSRC_SHIFT (24) -#define CR_CLKSRC_MASK (0x3 << CR_CLKSRC_SHIFT) +#define CR_CLKSRC_BITS (2) + +#define SR_OCIF (1 << 0) #define EPIT_TIMER_MAX 0XFFFFFFFFUL @@ -72,9 +74,7 @@ struct IMXEPITState { uint32_t sr; uint32_t lr; uint32_t cmp; - uint32_t cnt; - uint32_t freq; qemu_irq irq; }; diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h index ff5c8a3..5a1230d 100644 --- a/include/hw/timer/imx_gpt.h +++ b/include/hw/timer/imx_gpt.h @@ -78,6 +78,7 @@ #define TYPE_IMX25_GPT "imx25.gpt" #define TYPE_IMX31_GPT "imx31.gpt" #define TYPE_IMX6_GPT "imx6.gpt" +#define TYPE_IMX6UL_GPT "imx6ul.gpt" #define TYPE_IMX7_GPT "imx7.gpt" #define TYPE_IMX_GPT TYPE_IMX25_GPT diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 3c9a9a9..c25f390 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -343,6 +343,35 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line); */ void qemu_mutex_unlock_iothread(void); +/** + * QEMU_IOTHREAD_LOCK_GUARD + * + * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread. + */ +typedef struct IOThreadLockAuto IOThreadLockAuto; + +static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file, + int line) +{ + if (qemu_mutex_iothread_locked()) { + return NULL; + } + qemu_mutex_lock_iothread_impl(file, line); + /* Anything non-NULL causes the cleanup function to be called */ + return (IOThreadLockAuto *)(uintptr_t)1; +} + +static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l) +{ + qemu_mutex_unlock_iothread(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock) + +#define QEMU_IOTHREAD_LOCK_GUARD() \ + g_autoptr(IOThreadLockAuto) _iothread_lock_auto __attribute__((unused)) \ + = qemu_iothread_auto_lock(__FILE__, __LINE__) + /* * qemu_cond_wait_iothread: Wait on condition for the main loop mutex * diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 209e168..79b1cf7 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -667,35 +667,12 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); } #else /* TCG_TARGET_REG_BITS == 32 */ -static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); -} +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset); -static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); -} - -static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, - tcg_target_long offset) -{ - tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); -} - -static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); -} - -static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), - TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); -} +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_discard_i64(TCGv_i64 arg); void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg); @@ -841,7 +818,7 @@ static inline void tcg_gen_plugin_cb_start(unsigned from, unsigned type, static inline void tcg_gen_plugin_cb_end(void) { - tcg_emit_op(INDEX_op_plugin_cb_end); + tcg_emit_op(INDEX_op_plugin_cb_end, 0); } #if TARGET_LONG_BITS == 32 diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index d84bae6..b949d75 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -38,19 +38,7 @@ /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 -#if HOST_LONG_BITS == 32 -#define MAX_OPC_PARAM_PER_ARG 2 -#else -#define MAX_OPC_PARAM_PER_ARG 1 -#endif -#define MAX_OPC_PARAM_IARGS 7 -#define MAX_OPC_PARAM_OARGS 1 -#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) - -/* A Call op needs up to 4 + 2N parameters on 32-bit archs, - * and up to 4 + N parameters on 64-bit archs - * (N = number of input arguments + output arguments). */ -#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) +#define MAX_CALL_IARGS 7 #define CPU_TEMP_BUF_NLONGS 128 #define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) @@ -294,7 +282,8 @@ typedef enum TCGType { TCG_TYPE_V128, TCG_TYPE_V256, - TCG_TYPE_COUNT, /* number of different types */ + /* Number of different types (integer not enum) */ +#define TCG_TYPE_COUNT (TCG_TYPE_V256 + 1) /* An alias for the size of the host register. */ #if TCG_TARGET_REG_BITS == 32 @@ -319,6 +308,22 @@ typedef enum TCGType { } TCGType; /** + * tcg_type_size + * @t: type + * + * Return the size of the type in bytes. + */ +static inline int tcg_type_size(TCGType t) +{ + unsigned i = t; + if (i >= TCG_TYPE_V64) { + tcg_debug_assert(i < TCG_TYPE_COUNT); + i -= TCG_TYPE_V64 - 1; + } + return 4 << i; +} + +/** * get_alignment_bits * @memop: MemOp value * @@ -408,9 +413,6 @@ typedef TCGv_ptr TCGv_env; #define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) #define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE) -/* Used to align parameters. See the comment before tcgv_i32_temp. */ -#define TCG_CALL_DUMMY_ARG ((TCGArg)0) - /* * Flags for the bswap opcodes. * If IZ, the input is zero-extended, otherwise unknown. @@ -456,6 +458,7 @@ typedef struct TCGTemp { unsigned int mem_coherent:1; unsigned int mem_allocated:1; unsigned int temp_allocated:1; + unsigned int temp_subindex:1; int64_t val; struct TCGTemp *mem_base; @@ -475,34 +478,34 @@ typedef struct TCGTempSet { unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; } TCGTempSet; -/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, - this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. - There are never more than 2 outputs, which means that we can store all - dead + sync data within 16 bits. */ -#define DEAD_ARG 4 -#define SYNC_ARG 1 -typedef uint16_t TCGLifeData; +/* + * With 1 128-bit output, a 32-bit host requires 4 output parameters, + * which leaves a maximum of 28 other slots. Which is enough for 7 + * 128-bit operands. + */ +#define DEAD_ARG (1 << 4) +#define SYNC_ARG (1 << 0) +typedef uint32_t TCGLifeData; -/* The layout here is designed to avoid a bitfield crossing of - a 32-bit boundary, which would cause GCC to add extra padding. */ typedef struct TCGOp { - TCGOpcode opc : 8; /* 8 */ + TCGOpcode opc : 8; + unsigned nargs : 8; /* Parameters for this opcode. See below. */ - unsigned param1 : 4; /* 12 */ - unsigned param2 : 4; /* 16 */ + unsigned param1 : 8; + unsigned param2 : 8; /* Lifetime data of the operands. */ - unsigned life : 16; /* 32 */ + TCGLifeData life; /* Next and previous opcodes. */ QTAILQ_ENTRY(TCGOp) link; - /* Arguments for the opcode. */ - TCGArg args[MAX_OPC_PARAM]; - /* Register preferences for the output(s). */ TCGRegSet output_pref[2]; + + /* Arguments for the opcode. */ + TCGArg args[]; } TCGOp; #define TCGOP_CALLI(X) (X)->param1 @@ -514,6 +517,11 @@ typedef struct TCGOp { /* Make sure operands fit in the bitfields above. */ QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); +static inline TCGRegSet output_pref(const TCGOp *op, unsigned i) +{ + return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0; +} + typedef struct TCGProfile { int64_t cpu_exec_time; int64_t tb_count1; @@ -737,18 +745,6 @@ static inline TCGv_vec temp_tcgv_vec(TCGTemp *t) return (TCGv_vec)temp_tcgv_i32(t); } -#if TCG_TARGET_REG_BITS == 32 -static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) -{ - return temp_tcgv_i32(tcgv_i64_temp(t)); -} - -static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) -{ - return temp_tcgv_i32(tcgv_i64_temp(t) + 1); -} -#endif - static inline TCGArg tcg_get_insn_param(TCGOp *op, int arg) { return op->args[arg]; @@ -951,6 +947,8 @@ typedef struct TCGArgConstraint { unsigned ct : 16; unsigned alias_index : 4; unsigned sort_index : 4; + unsigned pair_index : 4; + unsigned pair : 2; /* 0: none, 1: first, 2: second, 3: second alias */ bool oalias : 1; bool ialias : 1; bool newreg : 1; @@ -1006,10 +1004,12 @@ bool tcg_op_supported(TCGOpcode op); void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); -TCGOp *tcg_emit_op(TCGOpcode opc); +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs); void tcg_op_remove(TCGContext *s, TCGOp *op); -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, + TCGOpcode opc, unsigned nargs); +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, + TCGOpcode opc, unsigned nargs); /** * tcg_remove_ops_after: diff --git a/linux-user/hexagon/signal.c b/linux-user/hexagon/signal.c index ad4e382..60fa7e1 100644 --- a/linux-user/hexagon/signal.c +++ b/linux-user/hexagon/signal.c @@ -39,15 +39,12 @@ struct target_sigcontext { target_ulong m0; target_ulong m1; target_ulong usr; - target_ulong p3_0; target_ulong gp; target_ulong ugp; target_ulong pc; target_ulong cause; target_ulong badva; - target_ulong pad1; - target_ulong pad2; - target_ulong pad3; + target_ulong pred[NUM_PREGS]; }; struct target_ucontext { @@ -118,10 +115,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, CPUHexagonState *env) __put_user(env->gpr[HEX_REG_M0], &sc->m0); __put_user(env->gpr[HEX_REG_M1], &sc->m1); __put_user(env->gpr[HEX_REG_USR], &sc->usr); - __put_user(env->gpr[HEX_REG_P3_0], &sc->p3_0); __put_user(env->gpr[HEX_REG_GP], &sc->gp); __put_user(env->gpr[HEX_REG_UGP], &sc->ugp); __put_user(env->gpr[HEX_REG_PC], &sc->pc); + + int i; + for (i = 0; i < NUM_PREGS; i++) { + __put_user(env->pred[i], &(sc->pred[i])); + } } static void setup_ucontext(struct target_ucontext *uc, @@ -230,10 +231,14 @@ static void restore_sigcontext(CPUHexagonState *env, __get_user(env->gpr[HEX_REG_M0], &sc->m0); __get_user(env->gpr[HEX_REG_M1], &sc->m1); __get_user(env->gpr[HEX_REG_USR], &sc->usr); - __get_user(env->gpr[HEX_REG_P3_0], &sc->p3_0); __get_user(env->gpr[HEX_REG_GP], &sc->gp); __get_user(env->gpr[HEX_REG_UGP], &sc->ugp); __get_user(env->gpr[HEX_REG_PC], &sc->pc); + + int i; + for (i = 0; i < NUM_PREGS; i++) { + __get_user(env->pred[i], &(sc->pred[i])); + } } static void restore_ucontext(CPUHexagonState *env, struct target_ucontext *uc) diff --git a/meson.build b/meson.build index 52e24ea..175517e 100644 --- a/meson.build +++ b/meson.build @@ -470,6 +470,7 @@ if get_option('tcg').allowed() endif if get_option('tcg_interpreter') tcg_arch = 'tci' + config_host += { 'CONFIG_TCG_INTERPRETER': 'y' } elif host_arch == 'x86_64' tcg_arch = 'i386' elif host_arch == 'ppc64' @@ -2551,9 +2552,6 @@ foreach target : target_dirs if sym == 'CONFIG_TCG' or target in accelerator_targets.get(sym, []) config_target += { sym: 'y' } config_all += { sym: 'y' } - if sym == 'CONFIG_TCG' and tcg_arch == 'tci' - config_target += { 'CONFIG_TCG_INTERPRETER': 'y' } - endif if target in modular_tcg config_target += { 'CONFIG_TCG_MODULAR': 'y' } else diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index aa6e30e..0f71e92 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -10,6 +10,9 @@ meson_options_help() { printf "%s\n" ' affects only QEMU, not tools like qemu-img)' printf "%s\n" ' --datadir=VALUE Data file directory [share]' printf "%s\n" ' --disable-coroutine-pool coroutine freelist (better performance)' + printf "%s\n" ' --disable-hexagon-idef-parser' + printf "%s\n" ' use idef-parser to automatically generate TCG' + printf "%s\n" ' code for the Hexagon frontend' printf "%s\n" ' --disable-install-blobs install provided firmware blobs' printf "%s\n" ' --docdir=VALUE Base directory for documentation installation' printf "%s\n" ' (can be empty) [share/doc]' @@ -40,7 +43,8 @@ meson_options_help() { printf "%s\n" ' --enable-trace-backends=CHOICES' printf "%s\n" ' Set available tracing backends [log] (choices:' printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' - printf "%s\n" ' --firmwarepath=VALUES search PATH for firmware files [share/qemu-firmware]' + printf "%s\n" ' --firmwarepath=VALUES search PATH for firmware files [share/qemu-' + printf "%s\n" ' firmware]' printf "%s\n" ' --iasl=VALUE Path to ACPI disassembler' printf "%s\n" ' --includedir=VALUE Header file directory [include]' printf "%s\n" ' --interp-prefix=VALUE where to find shared libraries etc., use %M for' @@ -93,7 +97,7 @@ meson_options_help() { printf "%s\n" ' glusterfs Glusterfs block device driver' printf "%s\n" ' gnutls GNUTLS cryptography support' printf "%s\n" ' gtk GTK+ user interface' - printf "%s\n" ' gtk-clipboard clipboard support for GTK (EXPERIMENTAL, MAY HANG)' + printf "%s\n" ' gtk-clipboard clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)' printf "%s\n" ' guest-agent Build QEMU Guest Agent' printf "%s\n" ' guest-agent-msi Build MSI package for the QEMU Guest Agent' printf "%s\n" ' hax HAX acceleration support' @@ -156,6 +160,8 @@ meson_options_help() { printf "%s\n" ' usb-redir libusbredir support' printf "%s\n" ' vde vde network backend support' printf "%s\n" ' vdi vdi image format support' + printf "%s\n" ' vduse-blk-export' + printf "%s\n" ' VDUSE block export support' printf "%s\n" ' vfio-user-server' printf "%s\n" ' vfio-user server support' printf "%s\n" ' vhost-crypto vhost-user crypto backend support' @@ -164,8 +170,6 @@ meson_options_help() { printf "%s\n" ' vhost-user vhost-user backend support' printf "%s\n" ' vhost-user-blk-server' printf "%s\n" ' build vhost-user-blk server' - printf "%s\n" ' vduse-blk-export' - printf "%s\n" ' VDUSE block export support' printf "%s\n" ' vhost-vdpa vhost-vdpa kernel backend support' printf "%s\n" ' virglrenderer virgl rendering support' printf "%s\n" ' virtfs virtio-9p support' @@ -283,6 +287,8 @@ _meson_option_parse() { --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;; --enable-hax) printf "%s" -Dhax=enabled ;; --disable-hax) printf "%s" -Dhax=disabled ;; + --enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;; + --disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;; --enable-hvf) printf "%s" -Dhvf=enabled ;; --disable-hvf) printf "%s" -Dhvf=disabled ;; --iasl=*) quote_sh "-Diasl=$2" ;; @@ -429,6 +435,8 @@ _meson_option_parse() { --disable-vde) printf "%s" -Dvde=disabled ;; --enable-vdi) printf "%s" -Dvdi=enabled ;; --disable-vdi) printf "%s" -Dvdi=disabled ;; + --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;; + --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;; --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;; --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;; --enable-vhost-crypto) printf "%s" -Dvhost_crypto=enabled ;; @@ -441,8 +449,6 @@ _meson_option_parse() { --disable-vhost-user) printf "%s" -Dvhost_user=disabled ;; --enable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=enabled ;; --disable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=disabled ;; - --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;; - --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;; --enable-vhost-vdpa) printf "%s" -Dvhost_vdpa=enabled ;; --disable-vhost-vdpa) printf "%s" -Dvhost_vdpa=disabled ;; --enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 2fa022f..5f63316 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -26,7 +26,6 @@ #include "target/arm/idau.h" #include "qemu/module.h" #include "qapi/error.h" -#include "qapi/visitor.h" #include "cpu.h" #ifdef CONFIG_TCG #include "hw/core/tcg-cpu-ops.h" @@ -309,6 +308,10 @@ static void arm_cpu_reset_hold(Object *obj) env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, CPACR, CP11, 3); #endif + if (arm_feature(env, ARM_FEATURE_V8)) { + env->cp15.rvbar = cpu->rvbar_prop; + env->regs[15] = cpu->rvbar_prop; + } } #if defined(CONFIG_USER_ONLY) @@ -487,6 +490,14 @@ static void arm_cpu_reset_hold(Object *obj) sizeof(*env->pmsav7.dracr) * cpu->pmsav7_dregion); } } + + if (cpu->pmsav8r_hdregion > 0) { + memset(env->pmsav8.hprbar, 0, + sizeof(*env->pmsav8.hprbar) * cpu->pmsav8r_hdregion); + memset(env->pmsav8.hprlar, 0, + sizeof(*env->pmsav8.hprlar) * cpu->pmsav8r_hdregion); + } + env->pmsav7.rnr[M_REG_NS] = 0; env->pmsav7.rnr[M_REG_S] = 0; env->pmsav8.mair0[M_REG_NS] = 0; @@ -1345,7 +1356,7 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property); } - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + if (arm_feature(&cpu->env, ARM_FEATURE_V8)) { object_property_add_uint64_ptr(obj, "rvbar", &cpu->rvbar_prop, OBJ_PROP_FLAG_READWRITE); @@ -1998,11 +2009,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) /* MPU can be configured out of a PMSA CPU either by setting has-mpu * to false or by setting pmsav7-dregion to 0. */ - if (!cpu->has_mpu) { - cpu->pmsav7_dregion = 0; - } - if (cpu->pmsav7_dregion == 0) { + if (!cpu->has_mpu || cpu->pmsav7_dregion == 0) { cpu->has_mpu = false; + cpu->pmsav7_dregion = 0; + cpu->pmsav8r_hdregion = 0; } if (arm_feature(env, ARM_FEATURE_PMSA) && @@ -2029,6 +2039,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) env->pmsav7.dracr = g_new0(uint32_t, nr); } } + + if (cpu->pmsav8r_hdregion > 0xff) { + error_setg(errp, "PMSAv8 MPU EL2 #regions invalid %" PRIu32, + cpu->pmsav8r_hdregion); + return; + } + + if (cpu->pmsav8r_hdregion) { + env->pmsav8.hprbar = g_new0(uint32_t, + cpu->pmsav8r_hdregion); + env->pmsav8.hprlar = g_new0(uint32_t, + cpu->pmsav8r_hdregion); + } } if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 2b4bd20..bf2bce04 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -309,6 +309,7 @@ typedef struct CPUArchState { }; uint64_t sctlr_el[4]; }; + uint64_t vsctlr; /* Virtualization System control register. */ uint64_t cpacr_el1; /* Architectural feature access control register */ uint64_t cptr_el[4]; /* ARMv8 feature trap registers */ uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ @@ -745,8 +746,11 @@ typedef struct CPUArchState { */ uint32_t *rbar[M_REG_NUM_BANKS]; uint32_t *rlar[M_REG_NUM_BANKS]; + uint32_t *hprbar; + uint32_t *hprlar; uint32_t mair0[M_REG_NUM_BANKS]; uint32_t mair1[M_REG_NUM_BANKS]; + uint32_t hprselr; } pmsav8; /* v8M SAU */ @@ -906,6 +910,8 @@ struct ArchCPU { bool has_mpu; /* PMSAv7 MPU number of supported regions */ uint32_t pmsav7_dregion; + /* PMSAv8 MPU number of supported hyp regions */ + uint32_t pmsav8r_hdregion; /* v8M SAU number of supported regions */ uint32_t sau_sregion; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 2cf2ca4..0e02196 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -21,13 +21,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" -#ifdef CONFIG_TCG -#include "hw/core/tcg-cpu-ops.h" -#endif /* CONFIG_TCG */ #include "qemu/module.h" -#if !defined(CONFIG_USER_ONLY) -#include "hw/loader.h" -#endif #include "sysemu/kvm.h" #include "sysemu/hvf.h" #include "kvm_arm.h" diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c index 568cbcf..ccde508 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -854,6 +854,47 @@ static void cortex_r5_initfn(Object *obj) define_arm_cp_regs(cpu, cortexr5_cp_reginfo); } +static void cortex_r52_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_PMSA); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + cpu->midr = 0x411fd133; /* r1p3 */ + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034023; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8144c004; + cpu->reset_sctlr = 0x30c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x10111001; + cpu->isar.id_dfr0 = 0x03010006; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00211040; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01200000; + cpu->isar.id_mmfr3 = 0xf0102211; + cpu->isar.id_mmfr4 = 0x00000010; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232142; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x00010001; + cpu->isar.dbgdidr = 0x77168000; + cpu->clidr = (1 << 27) | (1 << 24) | 0x3; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ + + cpu->pmsav7_dregion = 16; + cpu->pmsav8r_hdregion = 16; +} + static void cortex_r5f_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -1163,6 +1204,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { .class_init = arm_v7m_class_init }, { .name = "cortex-r5", .initfn = cortex_r5_initfn }, { .name = "cortex-r5f", .initfn = cortex_r5f_initfn }, + { .name = "cortex-r52", .initfn = cortex_r52_initfn }, { .name = "ti925t", .initfn = ti925t_initfn }, { .name = "sa1100", .initfn = sa1100_initfn }, { .name = "sa1110", .initfn = sa1110_initfn }, diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index c217392..2f6ddc0 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -437,6 +437,9 @@ static uint32_t arm_debug_exception_fsr(CPUARMState *env) if (target_el == 2 || arm_el_is_aa64(env, target_el)) { using_lpae = true; + } else if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + using_lpae = true; } else { if (arm_feature(env, ARM_FEATURE_LPAE) && (env->cp15.tcr_el[target_el] & TTBCR_EAE)) { diff --git a/target/arm/helper.c b/target/arm/helper.c index bac2ea6..cee3804 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -7,13 +7,11 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" #include "qemu/log.h" #include "trace.h" #include "cpu.h" #include "internals.h" #include "exec/helper-proto.h" -#include "qemu/host-utils.h" #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/bitops.h" @@ -22,17 +20,12 @@ #include "exec/exec-all.h" #include <zlib.h> /* For crc32 */ #include "hw/irq.h" -#include "semihosting/semihost.h" -#include "sysemu/cpus.h" #include "sysemu/cpu-timers.h" #include "sysemu/kvm.h" -#include "qemu/range.h" #include "qapi/qapi-commands-machine-target.h" #include "qapi/error.h" #include "qemu/guest-random.h" #ifdef CONFIG_TCG -#include "arm_ldst.h" -#include "exec/cpu_ldst.h" #include "semihosting/common-semi.h" #endif #include "cpregs.h" @@ -83,7 +76,8 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t v) { - /* Raw write of a coprocessor register (as needed for migration, etc). + /* + * Raw write of a coprocessor register (as needed for migration, etc). * Note that constant registers are treated as write-ignored; the * caller should check for success by whether a readback gives the * value written. @@ -101,7 +95,8 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, static bool raw_accessors_invalid(const ARMCPRegInfo *ri) { - /* Return true if the regdef would cause an assertion if you called + /* + * Return true if the regdef would cause an assertion if you called * read_raw_cp_reg() or write_raw_cp_reg() on it (ie if it is a * program bug for it not to have the NO_RAW flag). * NB that returning false here doesn't necessarily mean that calling @@ -184,7 +179,8 @@ bool write_list_to_cpustate(ARMCPU *cpu) if (ri->type & ARM_CP_NO_RAW) { continue; } - /* Write value and confirm it reads back as written + /* + * Write value and confirm it reads back as written * (to catch read-only registers and partially read-only * registers where the incoming migration value doesn't match) */ @@ -202,7 +198,7 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque) uint32_t regidx = (uintptr_t)key; const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); - if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); /* The value array need not be initialized at this point */ cpu->cpreg_array_len++; @@ -216,7 +212,7 @@ static void count_cpreg(gpointer key, gpointer opaque) ri = g_hash_table_lookup(cpu->cp_regs, key); - if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_array_len++; } } @@ -237,7 +233,8 @@ static gint cpreg_key_compare(gconstpointer a, gconstpointer b) void init_cpreg_list(ARMCPU *cpu) { - /* Initialise the cpreg_tuples[] array based on the cp_regs hash. + /* + * Initialise the cpreg_tuples[] array based on the cp_regs hash. * Note that we require cpreg_tuples[] to be sorted by key ID. */ GList *keys; @@ -279,7 +276,8 @@ static CPAccessResult access_el3_aa32ns(CPUARMState *env, return CP_ACCESS_OK; } -/* Some secure-only AArch32 registers trap to EL3 if used from +/* + * Some secure-only AArch32 registers trap to EL3 if used from * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts). * Note that an access from Secure EL1 can only happen if EL3 is AArch64. * We assume that the .access field is set to PL1_RW. @@ -301,7 +299,8 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env, return CP_ACCESS_TRAP_UNCATEGORIZED; } -/* Check for traps to performance monitor registers, which are controlled +/* + * Check for traps to performance monitor registers, which are controlled * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. */ static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, @@ -399,7 +398,8 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) ARMCPU *cpu = env_archcpu(env); if (raw_read(env, ri) != value) { - /* Unlike real hardware the qemu TLB uses virtual addresses, + /* + * Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ tlb_flush(CPU(cpu)); @@ -414,7 +414,8 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA) && !extended_addresses_enabled(env)) { - /* For VMSA (when not using the LPAE long descriptor page table + /* + * For VMSA (when not using the LPAE long descriptor page table * format) this register includes the ASID, so do a TLB flush. * For PMSA it is purely a process ID and no action is needed. */ @@ -606,7 +607,8 @@ static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, } static const ARMCPRegInfo cp_reginfo[] = { - /* Define the secure and non-secure FCSE identifier CP registers + /* + * Define the secure and non-secure FCSE identifier CP registers * separately because there is no secure bank in V8 (no _EL3). This allows * the secure register to be properly reset and migrated. There is also no * v8 EL1 version of the register so the non-secure instance stands alone. @@ -621,7 +623,8 @@ static const ARMCPRegInfo cp_reginfo[] = { .access = PL1_RW, .secure = ARM_CP_SECSTATE_S, .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s), .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, }, - /* Define the secure and non-secure context identifier CP registers + /* + * Define the secure and non-secure context identifier CP registers * separately because there is no secure bank in V8 (no _EL3). This allows * the secure register to be properly reset and migrated. In the * non-secure case, the 32-bit register will have reset and migration @@ -642,7 +645,8 @@ static const ARMCPRegInfo cp_reginfo[] = { }; static const ARMCPRegInfo not_v8_cp_reginfo[] = { - /* NB: Some of these registers exist in v8 but with more precise + /* + * NB: Some of these registers exist in v8 but with more precise * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]). */ /* MMU Domain access control / MPU write buffer control */ @@ -652,7 +656,8 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = { .writefn = dacr_write, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s), offsetoflow32(CPUARMState, cp15.dacr_ns) } }, - /* ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs. + /* + * ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs. * For v6 and v5, these mappings are overly broad. */ { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 0, @@ -670,7 +675,8 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = { }; static const ARMCPRegInfo not_v6_cp_reginfo[] = { - /* Not all pre-v6 cores implemented this WFI, so this is slightly + /* + * Not all pre-v6 cores implemented this WFI, so this is slightly * over-broad. */ { .name = "WFI_v5", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = 2, @@ -678,12 +684,14 @@ static const ARMCPRegInfo not_v6_cp_reginfo[] = { }; static const ARMCPRegInfo not_v7_cp_reginfo[] = { - /* Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which + /* + * Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which * is UNPREDICTABLE; we choose to NOP as most implementations do). */ { .name = "WFI_v6", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_WFI }, - /* L1 cache lockdown. Not architectural in v6 and earlier but in practice + /* + * L1 cache lockdown. Not architectural in v6 and earlier but in practice * implemented in 926, 946, 1026, 1136, 1176 and 11MPCore. StrongARM and * OMAPCP will override this space. */ @@ -697,14 +705,16 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY, .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, - /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; + /* + * We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; * implementing it as RAZ means the "debug architecture version" bits * will read as a reserved value, which should cause Linux to not try * to use the debug hardware. */ { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* MMU TLB control. Note that the wildcarding means we cover not just + /* + * MMU TLB control. Note that the wildcarding means we cover not just * the unified TLB ops but also the dside/iside/inner-shareable variants. */ { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, @@ -732,7 +742,8 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* In ARMv8 most bits of CPACR_EL1 are RES0. */ if (!arm_feature(env, ARM_FEATURE_V8)) { - /* ARMv7 defines bits for unimplemented coprocessors as RAZ/WI. + /* + * ARMv7 defines bits for unimplemented coprocessors as RAZ/WI. * ASEDIS [31] and D32DIS [30] are both UNK/SBZP without VFP. * TRCDIS [28] is RAZ/WI since we do not implement a trace macrocell. */ @@ -748,7 +759,8 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, value |= R_CPACR_ASEDIS_MASK; } - /* VFPv3 and upwards with NEON implement 32 double precision + /* + * VFPv3 and upwards with NEON implement 32 double precision * registers (D0-D31). */ if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) { @@ -790,7 +802,8 @@ static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri) { - /* Call cpacr_write() so that we reset with the correct RAO bits set + /* + * Call cpacr_write() so that we reset with the correct RAO bits set * for our CPU features. */ cpacr_write(env, ri, 0); @@ -831,7 +844,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { { .name = "MVA_prefetch", .cp = 15, .crn = 7, .crm = 13, .opc1 = 0, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NOP }, - /* We need to break the TB after ISB to execute self-modifying code + /* + * We need to break the TB after ISB to execute self-modifying code * correctly and also to take any pending interrupts immediately. * So use arm_cp_write_ignore() function instead of ARM_CP_NOP flag. */ @@ -846,7 +860,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ifar_s), offsetof(CPUARMState, cp15.ifar_ns) }, .resetvalue = 0, }, - /* Watchpoint Fault Address Register : should actually only be present + /* + * Watchpoint Fault Address Register : should actually only be present * for 1136, 1176, 11MPCore. */ { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, @@ -1051,7 +1066,8 @@ static bool event_supported(uint16_t number) static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* Performance monitor registers user accessibility is controlled + /* + * Performance monitor registers user accessibility is controlled * by PMUSERENR. MDCR_EL2.TPM and MDCR_EL3.TPM allow configurable * trapping to EL2 or EL3 for other accesses. */ @@ -1139,7 +1155,8 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState *env, (MDCR_HPME | MDCR_HPMD | MDCR_HPMN | MDCR_HCCD | MDCR_HLP) #define MDCR_EL3_PMU_ENABLE_BITS (MDCR_SPME | MDCR_SCCD) -/* Returns true if the counter (pass 31 for PMCCNTR) should count events using +/* + * Returns true if the counter (pass 31 for PMCCNTR) should count events using * the current EL, security state, and register configuration. */ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) @@ -1503,7 +1520,8 @@ static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* The value of PMSELR.SEL affects the behavior of PMXEVTYPER and + /* + * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are * accessed. @@ -1614,7 +1632,8 @@ static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; pmevcntr_op_finish(env, counter); } - /* Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when + /* + * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when * PMSELR value is equal to or greater than the number of implemented * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. */ @@ -1715,8 +1734,10 @@ static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, } return ret; } else { - /* We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. */ + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ return 0; } } @@ -1791,7 +1812,8 @@ static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Note that even though the AArch64 view of this register has bits + /* + * Note that even though the AArch64 view of this register has bits * [10:0] all RES0 we can only mask the bottom 5, to comply with the * architectural requirements for bits which are RES0 only in some * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7 @@ -1854,7 +1876,8 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) if (!arm_feature(env, ARM_FEATURE_EL2)) { valid_mask &= ~SCR_HCE; - /* On ARMv7, SMD (or SCD as it is called in v7) is only + /* + * On ARMv7, SMD (or SCD as it is called in v7) is only * supported if EL2 exists. The bit is UNK/SBZP when * EL2 is unavailable. In QEMU ARMv7, we force it to always zero * when EL2 is unavailable. @@ -1911,7 +1934,8 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - /* Acquire the CSSELR index from the bank corresponding to the CCSIDR + /* + * Acquire the CSSELR index from the bank corresponding to the CCSIDR * bank */ uint32_t index = A32_BANKED_REG_GET(env, csselr, @@ -1986,7 +2010,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */ { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_NOP }, - /* Performance monitors are implementation defined in v7, + /* + * Performance monitors are implementation defined in v7, * but with an ARM recommended set of registers, which we * follow. * @@ -2140,7 +2165,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .writefn = csselr_write, .resetvalue = 0, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s), offsetof(CPUARMState, cp15.csselr_ns) } }, - /* Auxiliary ID register: this actually has an IMPDEF value but for now + /* + * Auxiliary ID register: this actually has an IMPDEF value but for now * just RAZ for all cores: */ { .name = "AIDR", .state = ARM_CP_STATE_BOTH, @@ -2148,7 +2174,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid1, .resetvalue = 0 }, - /* Auxiliary fault status registers: these also are IMPDEF, and we + /* + * Auxiliary fault status registers: these also are IMPDEF, and we * choose to RAZ/WI for all cores. */ { .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH, @@ -2159,7 +2186,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* MAIR can just read-as-written because we don't implement caches + /* + * MAIR can just read-as-written because we don't implement caches * and so don't need to care about memory attributes. */ { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64, @@ -2171,10 +2199,12 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .opc0 = 3, .opc1 = 6, .crn = 10, .crm = 2, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[3]), .resetvalue = 0 }, - /* For non-long-descriptor page tables these are PRRR and NMRR; + /* + * For non-long-descriptor page tables these are PRRR and NMRR; * regardless they still act as reads-as-written for QEMU. */ - /* MAIR0/1 are defined separately from their 64-bit counterpart which + /* + * MAIR0/1 are defined separately from their 64-bit counterpart which * allows them to assign the correct fieldoffset based on the endianness * handled in the field definitions. */ @@ -2313,11 +2343,11 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { .resetfn = arm_cp_reset_ignore }, { .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0, - .access = PL0_R|PL1_W, + .access = PL0_R | PL1_W, .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el[0]), .resetvalue = 0}, { .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3, - .access = PL0_R|PL1_W, + .access = PL0_R | PL1_W, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidruro_s), offsetoflow32(CPUARMState, cp15.tpidruro_ns) }, .resetfn = arm_cp_reset_ignore }, @@ -2337,7 +2367,8 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero. + /* + * CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero. * Writable only at the highest implemented exception level. */ int el = arm_current_el(env); @@ -2496,7 +2527,8 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* The AArch64 register view of the secure physical timer is + /* + * The AArch64 register view of the secure physical timer is * always accessible from EL3, and configurably accessible from * Secure EL1. */ @@ -2531,7 +2563,8 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx]; if (gt->ctl & 1) { - /* Timer enabled: calculate and set current ISTATUS, irq, and + /* + * Timer enabled: calculate and set current ISTATUS, irq, and * reset timer to when ISTATUS next has to change */ uint64_t offset = timeridx == GTIMER_VIRT ? @@ -2554,7 +2587,8 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) /* Next transition is when we hit cval */ nexttick = gt->cval + offset; } - /* Note that the desired next expiry time might be beyond the + /* + * Note that the desired next expiry time might be beyond the * signed-64-bit range of a QEMUTimer -- in this case we just * set the timer for as far in the future as possible. When the * timer expires we will reset the timer for any remaining period. @@ -2671,7 +2705,8 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Enable toggled */ gt_recalc_timer(cpu, timeridx); } else if ((oldval ^ value) & 2) { - /* IMASK toggled: don't need to recalculate, + /* + * IMASK toggled: don't need to recalculate, * just set the interrupt line based on ISTATUS */ int irqstate = (oldval & 4) && !(value & 2); @@ -2982,7 +3017,8 @@ static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) } static const ARMCPRegInfo generic_timer_cp_reginfo[] = { - /* Note that CNTFRQ is purely reads-as-written for the benefit + /* + * Note that CNTFRQ is purely reads-as-written for the benefit * of software; writing it doesn't actually change the timer frequency. * Our reset value matches the fixed frequency we implement the timer at. */ @@ -3145,7 +3181,8 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .readfn = gt_virt_redir_cval_read, .raw_readfn = raw_read, .writefn = gt_virt_redir_cval_write, .raw_writefn = raw_write, }, - /* Secure timer -- this is actually restricted to only EL3 + /* + * Secure timer -- this is actually restricted to only EL3 * and configurably Secure-EL1 via the accessfn. */ { .name = "CNTPS_TVAL_EL1", .state = ARM_CP_STATE_AA64, @@ -3184,7 +3221,8 @@ static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, #else -/* In user-mode most of the generic timer registers are inaccessible +/* + * In user-mode most of the generic timer registers are inaccessible * however modern kernels (4.12+) allow access to cntvct_el0 */ @@ -3192,7 +3230,8 @@ static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - /* Currently we have no support for QEMUTimer in linux-user so we + /* + * Currently we have no support for QEMUTimer in linux-user so we * can't call gt_get_countervalue(env), instead we directly * call the lower level functions. */ @@ -3233,7 +3272,8 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (ri->opc2 & 4) { - /* The ATS12NSO* operations must trap to EL3 or EL2 if executed in + /* + * The ATS12NSO* operations must trap to EL3 or EL2 if executed in * Secure EL1 (which can only happen if EL3 is AArch64). * They are simply UNDEF if executed from NS EL1. * They function normally from EL2 or EL3. @@ -3394,7 +3434,8 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, } } } else { - /* fsr is a DFSR/IFSR value for the short descriptor + /* + * fsr is a DFSR/IFSR value for the short descriptor * translation table format (with WnR always clear). * Convert it to a 32-bit PAR. */ @@ -3682,8 +3723,225 @@ static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +static void prbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value; +} + +static uint64_t prbar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]]; +} + +static void prlar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value; +} + +static uint64_t prlar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]]; +} + +static void prselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + /* + * Ignore writes that would select not implemented region. + * This is architecturally UNPREDICTABLE. + */ + if (value >= cpu->pmsav7_dregion) { + return; + } + + env->pmsav7.rnr[M_REG_NS] = value; +} + +static void hprbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.hprbar[env->pmsav8.hprselr] = value; +} + +static uint64_t hprbar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.hprbar[env->pmsav8.hprselr]; +} + +static void hprlar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.hprlar[env->pmsav8.hprselr] = value; +} + +static uint64_t hprlar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.hprlar[env->pmsav8.hprselr]; +} + +static void hprenr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint32_t n; + uint32_t bit; + ARMCPU *cpu = env_archcpu(env); + + /* Ignore writes to unimplemented regions */ + int rmax = MIN(cpu->pmsav8r_hdregion, 32); + value &= MAKE_64BIT_MASK(0, rmax); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + + /* Register alias is only valid for first 32 indexes */ + for (n = 0; n < rmax; ++n) { + bit = extract32(value, n, 1); + env->pmsav8.hprlar[n] = deposit32( + env->pmsav8.hprlar[n], 0, 1, bit); + } +} + +static uint64_t hprenr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint32_t n; + uint32_t result = 0x0; + ARMCPU *cpu = env_archcpu(env); + + /* Register alias is only valid for first 32 indexes */ + for (n = 0; n < MIN(cpu->pmsav8r_hdregion, 32); ++n) { + if (env->pmsav8.hprlar[n] & 0x1) { + result |= (0x1 << n); + } + } + return result; +} + +static void hprselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + /* + * Ignore writes that would select not implemented region. + * This is architecturally UNPREDICTABLE. + */ + if (value >= cpu->pmsav8r_hdregion) { + return; + } + + env->pmsav8.hprselr = value; +} + +static void pmsav8r_regn_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint8_t index = (extract32(ri->opc0, 0, 1) << 4) | + (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + + if (ri->opc1 & 4) { + if (index >= cpu->pmsav8r_hdregion) { + return; + } + if (ri->opc2 & 0x1) { + env->pmsav8.hprlar[index] = value; + } else { + env->pmsav8.hprbar[index] = value; + } + } else { + if (index >= cpu->pmsav7_dregion) { + return; + } + if (ri->opc2 & 0x1) { + env->pmsav8.rlar[M_REG_NS][index] = value; + } else { + env->pmsav8.rbar[M_REG_NS][index] = value; + } + } +} + +static uint64_t pmsav8r_regn_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = env_archcpu(env); + uint8_t index = (extract32(ri->opc0, 0, 1) << 4) | + (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1); + + if (ri->opc1 & 4) { + if (index >= cpu->pmsav8r_hdregion) { + return 0x0; + } + if (ri->opc2 & 0x1) { + return env->pmsav8.hprlar[index]; + } else { + return env->pmsav8.hprbar[index]; + } + } else { + if (index >= cpu->pmsav7_dregion) { + return 0x0; + } + if (ri->opc2 & 0x1) { + return env->pmsav8.rlar[M_REG_NS][index]; + } else { + return env->pmsav8.rbar[M_REG_NS][index]; + } + } +} + +static const ARMCPRegInfo pmsav8r_cp_reginfo[] = { + { .name = "PRBAR", + .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .accessfn = access_tvm_trvm, + .readfn = prbar_read, .writefn = prbar_write }, + { .name = "PRLAR", + .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .accessfn = access_tvm_trvm, + .readfn = prlar_read, .writefn = prlar_write }, + { .name = "PRSELR", .resetvalue = 0, + .cp = 15, .opc1 = 0, .crn = 6, .crm = 2, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tvm_trvm, + .writefn = prselr_write, + .fieldoffset = offsetof(CPUARMState, pmsav7.rnr[M_REG_NS]) }, + { .name = "HPRBAR", .resetvalue = 0, + .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprbar_read, .writefn = hprbar_write }, + { .name = "HPRLAR", + .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprlar_read, .writefn = hprlar_write }, + { .name = "HPRSELR", .resetvalue = 0, + .cp = 15, .opc1 = 4, .crn = 6, .crm = 2, .opc2 = 1, + .access = PL2_RW, + .writefn = hprselr_write, + .fieldoffset = offsetof(CPUARMState, pmsav8.hprselr) }, + { .name = "HPRENR", + .cp = 15, .opc1 = 4, .crn = 6, .crm = 1, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprenr_read, .writefn = hprenr_write }, +}; + static const ARMCPRegInfo pmsav7_cp_reginfo[] = { - /* Reset for all these registers is handled in arm_cpu_reset(), + /* + * Reset for all these registers is handled in arm_cpu_reset(), * because the PMSAv7 is also used by M-profile CPUs, which do * not register cpregs but still need the state to be reset. */ @@ -3784,7 +4042,8 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, } if (arm_feature(env, ARM_FEATURE_LPAE)) { - /* With LPAE the TTBCR could result in a change of ASID + /* + * With LPAE the TTBCR could result in a change of ASID * via the TTBCR.A1 bit, so do a TLB flush. */ tlb_flush(CPU(cpu)); @@ -3901,7 +4160,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = { offsetoflow32(CPUARMState, cp15.tcr_el[1])} }, }; -/* Note that unlike TTBCR, writing to TTBCR2 does not require flushing +/* + * Note that unlike TTBCR, writing to TTBCR2 does not require flushing * qemu tlbs nor adjusting cached masks. */ static const ARMCPRegInfo ttbcr2_reginfo = { @@ -3939,7 +4199,8 @@ static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* On OMAP there are registers indicating the max/min index of dcache lines + /* + * On OMAP there are registers indicating the max/min index of dcache lines * containing a dirty line; cache flush operations have to reset these. */ env->cp15.c15_i_max = 0x000; @@ -3971,7 +4232,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { .crm = 8, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NO_RAW, .readfn = arm_cp_read_zero, .writefn = omap_wfi_write, }, - /* TODO: Peripheral port remap register: + /* + * TODO: Peripheral port remap register: * On OMAP2 mcr p15, 0, rn, c15, c2, 4 sets up the interrupt controller * base address at $rn & ~0xfff and map size of 0x200 << ($rn & 0xfff), * when MMU is off. @@ -4000,7 +4262,8 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = { .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr), .resetvalue = 0, }, - /* XScale specific cache-lockdown: since we have no cache we NOP these + /* + * XScale specific cache-lockdown: since we have no cache we NOP these * and hope the guest does not really rely on cache behaviour. */ { .name = "XSCALE_LOCK_ICACHE_LINE", @@ -4018,7 +4281,8 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = { }; static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { - /* RAZ/WI the whole crn=15 space, when we don't have a more specific + /* + * RAZ/WI the whole crn=15 space, when we don't have a more specific * implementation of this implementation-defined space. * Ideally this should eventually disappear in favour of actually * implementing the correct behaviour for all cores. @@ -4044,21 +4308,22 @@ static const ARMCPRegInfo cache_block_ops_cp_reginfo[] = { .resetvalue = 0 }, /* The cache ops themselves: these all NOP for QEMU */ { .name = "IICR", .cp = 15, .crm = 5, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "IDCR", .cp = 15, .crm = 6, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "CDCR", .cp = 15, .crm = 12, .opc1 = 0, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "PIR", .cp = 15, .crm = 12, .opc1 = 1, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "PDR", .cp = 15, .crm = 12, .opc1 = 2, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "CIDCR", .cp = 15, .crm = 14, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, }; static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = { - /* The cache test-and-clean instructions always return (1 << 30) + /* + * The cache test-and-clean instructions always return (1 << 30) * to indicate that there are no dirty cache lines. */ { .name = "TC_DCACHE", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 3, @@ -4094,7 +4359,8 @@ static uint64_t mpidr_read_val(CPUARMState *env) if (arm_feature(env, ARM_FEATURE_V7MP)) { mpidr |= (1U << 31); - /* Cores which are uniprocessor (non-coherent) + /* + * Cores which are uniprocessor (non-coherent) * but still implement the MP extensions set * bit 30. (For instance, Cortex-R5). */ @@ -4306,7 +4572,8 @@ static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri, return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU); } -/* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions +/* + * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions * Page D4-1736 (DDI0487A.b) */ @@ -4439,7 +4706,8 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL2 + /* + * Invalidate by VA, EL2 * Currently handles both VAE2 and VALE2, since we don't support * flush-last-level-only. */ @@ -4453,7 +4721,8 @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL3 + /* + * Invalidate by VA, EL3 * Currently handles both VAE3 and VALE3, since we don't support * flush-last-level-only. */ @@ -4478,7 +4747,8 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL1&0 (AArch64 version). + /* + * Invalidate by VA, EL1&0 (AArch64 version). * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, * since we don't support flush-for-specific-ASID-only or * flush-last-level-only. @@ -4799,7 +5069,8 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (!(env->pstate & PSTATE_SP)) { - /* Access to SP_EL0 is undefined if it's being used as + /* + * Access to SP_EL0 is undefined if it's being used as * the stack pointer. */ return CP_ACCESS_TRAP_UNCATEGORIZED; @@ -4839,7 +5110,8 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, } if (raw_read(env, ri) == value) { - /* Skip the TLB flush if nothing actually changed; Linux likes + /* + * Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. */ return; @@ -4907,7 +5179,8 @@ static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, } static const ARMCPRegInfo v8_cp_reginfo[] = { - /* Minimal set of EL0-visible registers. This will need to be expanded + /* + * Minimal set of EL0-visible registers. This will need to be expanded * significantly for system emulation of AArch64 CPUs. */ { .name = "NZCV", .state = ARM_CP_STATE_AA64, @@ -5190,7 +5463,8 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) }, - /* We rely on the access checks not allowing the guest to write to the + /* + * We rely on the access checks not allowing the guest to write to the * state field when SPSel indicates that it's being used as the stack * pointer. */ @@ -5268,7 +5542,8 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (arm_feature(env, ARM_FEATURE_EL3)) { valid_mask &= ~HCR_HCD; } else if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) { - /* Architecturally HCR.TSC is RES0 if EL3 is not implemented. + /* + * Architecturally HCR.TSC is RES0 if EL3 is not implemented. * However, if we're using the SMC PSCI conduit then QEMU is * effectively acting like EL3 firmware and so the guest at * EL2 should retain the ability to prevent EL1 from being @@ -5698,7 +5973,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_vae2is_write }, #ifndef CONFIG_USER_ONLY - /* Unlike the other EL2-related AT operations, these must + /* + * Unlike the other EL2-related AT operations, these must * UNDEF from EL3 if EL2 is not implemented, which is why we * define them here rather than with the rest of the AT ops. */ @@ -5712,7 +5988,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .access = PL2_W, .accessfn = at_s1e2_access, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = ats_write64 }, - /* The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE + /* + * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose * to behave as if SCR.NS was 1. @@ -5725,7 +6002,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0, - /* ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the + /* + * ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the * reset values as IMPDEF. We choose to reset to 3 to comply with * both ARMv7 and ARMv8. */ @@ -5808,7 +6086,8 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = { static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* The NSACR is RW at EL3, and RO for NS EL1 and NS EL2. + /* + * The NSACR is RW at EL3, and RO for NS EL1 and NS EL2. * At Secure EL1 it traps to EL3 or EL2. */ if (arm_current_el(env) == 3) { @@ -6612,7 +6891,8 @@ static void define_pmu_regs(ARMCPU *cpu) } } -/* We don't know until after realize whether there's a GICv3 +/* + * We don't know until after realize whether there's a GICv3 * attached, and that is what registers the gicv3 sysregs. * So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1 * at runtime. @@ -6641,7 +6921,8 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) } #endif -/* Shared logic between LORID and the rest of the LOR* registers. +/* + * Shared logic between LORID and the rest of the LOR* registers. * Secure state exclusion has already been dealt with. */ static CPAccessResult access_lor_ns(CPUARMState *env, @@ -7468,7 +7749,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, cp_reginfo); if (!arm_feature(env, ARM_FEATURE_V8)) { - /* Must go early as it is full of wildcards that may be + /* + * Must go early as it is full of wildcards that may be * overridden by later definitions. */ define_arm_cp_regs(cpu, not_v8_cp_reginfo); @@ -7482,7 +7764,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, .resetvalue = cpu->isar.id_pfr0 }, - /* ID_PFR1 is not a plain ARM_CP_CONST because we don't know + /* + * ID_PFR1 is not a plain ARM_CP_CONST because we don't know * the value of the GIC field until after we define these regs. */ { .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH, @@ -7864,31 +8147,89 @@ void register_cp_regs_for_features(ARMCPU *cpu) #ifdef CONFIG_USER_ONLY static const ARMCPRegUserSpaceInfo v8_user_idregs[] = { { .name = "ID_AA64PFR0_EL1", - .exported_bits = 0x000f000f00ff0000, - .fixed_bits = 0x0000000000000011 }, + .exported_bits = R_ID_AA64PFR0_FP_MASK | + R_ID_AA64PFR0_ADVSIMD_MASK | + R_ID_AA64PFR0_SVE_MASK | + R_ID_AA64PFR0_DIT_MASK, + .fixed_bits = (0x1u << R_ID_AA64PFR0_EL0_SHIFT) | + (0x1u << R_ID_AA64PFR0_EL1_SHIFT) }, { .name = "ID_AA64PFR1_EL1", - .exported_bits = 0x00000000000000f0 }, + .exported_bits = R_ID_AA64PFR1_BT_MASK | + R_ID_AA64PFR1_SSBS_MASK | + R_ID_AA64PFR1_MTE_MASK | + R_ID_AA64PFR1_SME_MASK }, { .name = "ID_AA64PFR*_EL1_RESERVED", - .is_glob = true }, - { .name = "ID_AA64ZFR0_EL1" }, + .is_glob = true }, + { .name = "ID_AA64ZFR0_EL1", + .exported_bits = R_ID_AA64ZFR0_SVEVER_MASK | + R_ID_AA64ZFR0_AES_MASK | + R_ID_AA64ZFR0_BITPERM_MASK | + R_ID_AA64ZFR0_BFLOAT16_MASK | + R_ID_AA64ZFR0_SHA3_MASK | + R_ID_AA64ZFR0_SM4_MASK | + R_ID_AA64ZFR0_I8MM_MASK | + R_ID_AA64ZFR0_F32MM_MASK | + R_ID_AA64ZFR0_F64MM_MASK }, + { .name = "ID_AA64SMFR0_EL1", + .exported_bits = R_ID_AA64SMFR0_F32F32_MASK | + R_ID_AA64SMFR0_B16F32_MASK | + R_ID_AA64SMFR0_F16F32_MASK | + R_ID_AA64SMFR0_I8I32_MASK | + R_ID_AA64SMFR0_F64F64_MASK | + R_ID_AA64SMFR0_I16I64_MASK | + R_ID_AA64SMFR0_FA64_MASK }, { .name = "ID_AA64MMFR0_EL1", - .fixed_bits = 0x00000000ff000000 }, - { .name = "ID_AA64MMFR1_EL1" }, + .exported_bits = R_ID_AA64MMFR0_ECV_MASK, + .fixed_bits = (0xfu << R_ID_AA64MMFR0_TGRAN64_SHIFT) | + (0xfu << R_ID_AA64MMFR0_TGRAN4_SHIFT) }, + { .name = "ID_AA64MMFR1_EL1", + .exported_bits = R_ID_AA64MMFR1_AFP_MASK }, + { .name = "ID_AA64MMFR2_EL1", + .exported_bits = R_ID_AA64MMFR2_AT_MASK }, { .name = "ID_AA64MMFR*_EL1_RESERVED", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64DFR0_EL1", - .fixed_bits = 0x0000000000000006 }, - { .name = "ID_AA64DFR1_EL1" }, + .fixed_bits = (0x6u << R_ID_AA64DFR0_DEBUGVER_SHIFT) }, + { .name = "ID_AA64DFR1_EL1" }, { .name = "ID_AA64DFR*_EL1_RESERVED", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64AFR*", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64ISAR0_EL1", - .exported_bits = 0x00fffffff0fffff0 }, + .exported_bits = R_ID_AA64ISAR0_AES_MASK | + R_ID_AA64ISAR0_SHA1_MASK | + R_ID_AA64ISAR0_SHA2_MASK | + R_ID_AA64ISAR0_CRC32_MASK | + R_ID_AA64ISAR0_ATOMIC_MASK | + R_ID_AA64ISAR0_RDM_MASK | + R_ID_AA64ISAR0_SHA3_MASK | + R_ID_AA64ISAR0_SM3_MASK | + R_ID_AA64ISAR0_SM4_MASK | + R_ID_AA64ISAR0_DP_MASK | + R_ID_AA64ISAR0_FHM_MASK | + R_ID_AA64ISAR0_TS_MASK | + R_ID_AA64ISAR0_RNDR_MASK }, { .name = "ID_AA64ISAR1_EL1", - .exported_bits = 0x000000f0ffffffff }, + .exported_bits = R_ID_AA64ISAR1_DPB_MASK | + R_ID_AA64ISAR1_APA_MASK | + R_ID_AA64ISAR1_API_MASK | + R_ID_AA64ISAR1_JSCVT_MASK | + R_ID_AA64ISAR1_FCMA_MASK | + R_ID_AA64ISAR1_LRCPC_MASK | + R_ID_AA64ISAR1_GPA_MASK | + R_ID_AA64ISAR1_GPI_MASK | + R_ID_AA64ISAR1_FRINTTS_MASK | + R_ID_AA64ISAR1_SB_MASK | + R_ID_AA64ISAR1_BF16_MASK | + R_ID_AA64ISAR1_DGH_MASK | + R_ID_AA64ISAR1_I8MM_MASK }, + { .name = "ID_AA64ISAR2_EL1", + .exported_bits = R_ID_AA64ISAR2_WFXT_MASK | + R_ID_AA64ISAR2_RPRES_MASK | + R_ID_AA64ISAR2_GPA3_MASK | + R_ID_AA64ISAR2_APA3_MASK }, { .name = "ID_AA64ISAR*_EL1_RESERVED", - .is_glob = true }, + .is_glob = true }, }; modify_arm_cp_regs(v8_idregs, v8_user_idregs); #endif @@ -7896,7 +8237,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (!arm_feature(env, ARM_FEATURE_EL3) && !arm_feature(env, ARM_FEATURE_EL2)) { ARMCPRegInfo rvbar = { - .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64, + .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, .access = PL1_R, .fieldoffset = offsetof(CPUARMState, cp15.rvbar), @@ -7987,13 +8328,20 @@ void register_cp_regs_for_features(ARMCPU *cpu) } /* RVBAR_EL2 is only implemented if EL2 is the highest EL */ if (!arm_feature(env, ARM_FEATURE_EL3)) { - ARMCPRegInfo rvbar = { - .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, - .access = PL2_R, - .fieldoffset = offsetof(CPUARMState, cp15.rvbar), + ARMCPRegInfo rvbar[] = { + { + .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar), + }, + { .name = "RVBAR", .type = ARM_CP_ALIAS, + .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar), + }, }; - define_one_arm_cp_reg(cpu, &rvbar); + define_arm_cp_regs(cpu, rvbar); } } @@ -8016,7 +8364,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, el3_regs); } - /* The behaviour of NSACR is sufficiently various that we don't + /* + * The behaviour of NSACR is sufficiently various that we don't * try to describe it in a single reginfo: * if EL3 is 64 bit, then trap to EL3 from S EL1, * reads as constant 0xc00 from NS EL1 and NS EL2 @@ -8108,13 +8457,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa32_jazelle, cpu)) { define_arm_cp_regs(cpu, jazelle_regs); } - /* Slightly awkwardly, the OMAP and StrongARM cores need all of + /* + * Slightly awkwardly, the OMAP and StrongARM cores need all of * cp15 crn=0 to be writes-ignored, whereas for other cores they should * be read-only (ie write causes UNDEF exception). */ { ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = { - /* Pre-v8 MIDR space. + /* + * Pre-v8 MIDR space. * Note that the MIDR isn't a simple constant register because * of the TI925 behaviour where writes to another register can * cause the MIDR value to change. @@ -8153,10 +8504,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr, .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid), .readfn = midr_read }, - /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */ - { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, - .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, - .access = PL1_R, .resetvalue = cpu->midr }, + /* crn = 0 op1 = 0 crm = 0 op2 = 7 : AArch32 aliases of MIDR */ { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 7, .access = PL1_R, .resetvalue = cpu->midr }, @@ -8166,6 +8514,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .accessfn = access_aa64_tid1, .type = ARM_CP_CONST, .resetvalue = cpu->revidr }, }; + ARMCPRegInfo id_v8_midr_alias_cp_reginfo = { + .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_R, .resetvalue = cpu->midr + }; ARMCPRegInfo id_cp_reginfo[] = { /* These are common to v8 and pre-v8 */ { .name = "CTR", @@ -8198,6 +8551,13 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->pmsav7_dregion << 8 }; + /* HMPUIR is specific to PMSA V8 */ + ARMCPRegInfo id_hmpuir_reginfo = { + .name = "HMPUIR", + .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL2_R, .type = ARM_CP_CONST, + .resetvalue = cpu->pmsav8r_hdregion + }; static const ARMCPRegInfo crn0_wi_reginfo = { .name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W, @@ -8206,15 +8566,20 @@ void register_cp_regs_for_features(ARMCPU *cpu) #ifdef CONFIG_USER_ONLY static const ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = { { .name = "MIDR_EL1", - .exported_bits = 0x00000000ffffffff }, - { .name = "REVIDR_EL1" }, + .exported_bits = R_MIDR_EL1_REVISION_MASK | + R_MIDR_EL1_PARTNUM_MASK | + R_MIDR_EL1_ARCHITECTURE_MASK | + R_MIDR_EL1_VARIANT_MASK | + R_MIDR_EL1_IMPLEMENTER_MASK }, + { .name = "REVIDR_EL1" }, }; modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo); #endif if (arm_feature(env, ARM_FEATURE_OMAPCP) || arm_feature(env, ARM_FEATURE_STRONGARM)) { size_t i; - /* Register the blanket "writes ignored" value first to cover the + /* + * Register the blanket "writes ignored" value first to cover the * whole space. Then update the specific ID registers to allow write * access, so that they ignore writes rather than causing them to * UNDEF. @@ -8231,12 +8596,83 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_V8)) { define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo); + if (!arm_feature(env, ARM_FEATURE_PMSA)) { + define_one_arm_cp_reg(cpu, &id_v8_midr_alias_cp_reginfo); + } } else { define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo); } define_arm_cp_regs(cpu, id_cp_reginfo); if (!arm_feature(env, ARM_FEATURE_PMSA)) { define_one_arm_cp_reg(cpu, &id_tlbtr_reginfo); + } else if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + uint32_t i = 0; + char *tmp_string; + + define_one_arm_cp_reg(cpu, &id_mpuir_reginfo); + define_one_arm_cp_reg(cpu, &id_hmpuir_reginfo); + define_arm_cp_regs(cpu, pmsav8r_cp_reginfo); + + /* Register alias is only valid for first 32 indexes */ + for (i = 0; i < MIN(cpu->pmsav7_dregion, 32); ++i) { + uint8_t crm = 0b1000 | extract32(i, 1, 3); + uint8_t opc1 = extract32(i, 4, 1); + uint8_t opc2 = extract32(i, 0, 1) << 2; + + tmp_string = g_strdup_printf("PRBAR%u", i); + ARMCPRegInfo tmp_prbarn_reginfo = { + .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL1_RW, .resetvalue = 0, + .accessfn = access_tvm_trvm, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_prbarn_reginfo); + g_free(tmp_string); + + opc2 = extract32(i, 0, 1) << 2 | 0x1; + tmp_string = g_strdup_printf("PRLAR%u", i); + ARMCPRegInfo tmp_prlarn_reginfo = { + .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL1_RW, .resetvalue = 0, + .accessfn = access_tvm_trvm, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_prlarn_reginfo); + g_free(tmp_string); + } + + /* Register alias is only valid for first 32 indexes */ + for (i = 0; i < MIN(cpu->pmsav8r_hdregion, 32); ++i) { + uint8_t crm = 0b1000 | extract32(i, 1, 3); + uint8_t opc1 = 0b100 | extract32(i, 4, 1); + uint8_t opc2 = extract32(i, 0, 1) << 2; + + tmp_string = g_strdup_printf("HPRBAR%u", i); + ARMCPRegInfo tmp_hprbarn_reginfo = { + .name = tmp_string, + .type = ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL2_RW, .resetvalue = 0, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_hprbarn_reginfo); + g_free(tmp_string); + + opc2 = extract32(i, 0, 1) << 2 | 0x1; + tmp_string = g_strdup_printf("HPRLAR%u", i); + ARMCPRegInfo tmp_hprlarn_reginfo = { + .name = tmp_string, + .type = ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL2_RW, .resetvalue = 0, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_hprlarn_reginfo); + g_free(tmp_string); + } } else if (arm_feature(env, ARM_FEATURE_V7)) { define_one_arm_cp_reg(cpu, &id_mpuir_reginfo); } @@ -8314,7 +8750,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo cbar = { .name = "CBAR", .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0, - .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar, + .access = PL1_R | PL3_W, .resetvalue = cpu->reset_cbar, .fieldoffset = offsetof(CPUARMState, cp15.c15_config_base_address) }; @@ -8351,13 +8787,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) .raw_writefn = raw_write, }; if (arm_feature(env, ARM_FEATURE_XSCALE)) { - /* Normally we would always end the TB on an SCTLR write, but Linux + /* + * Normally we would always end the TB on an SCTLR write, but Linux * arch/arm/mach-pxa/sleep.S expects two instructions following * an MMU enable to execute from cache. Imitate this behaviour. */ sctlr.type |= ARM_CP_SUPPRESS_TB_END; } define_one_arm_cp_reg(cpu, &sctlr); + + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + ARMCPRegInfo vsctlr = { + .name = "VSCTLR", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL2_RW, .resetvalue = 0x0, + .fieldoffset = offsetoflow32(CPUARMState, cp15.vsctlr), + }; + define_one_arm_cp_reg(cpu, &vsctlr); + } } if (cpu_isar_feature(aa64_lor, cpu)) { @@ -8746,7 +9194,8 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *r, void *opaque) { - /* Define implementations of coprocessor registers. + /* + * Define implementations of coprocessor registers. * We store these in a hashtable because typically * there are less than 150 registers in a space which * is 16*16*16*8*8 = 262144 in size. @@ -8813,7 +9262,8 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, default: g_assert_not_reached(); } - /* The AArch64 pseudocode CheckSystemAccess() specifies that op1 + /* + * The AArch64 pseudocode CheckSystemAccess() specifies that op1 * encodes a minimum access level for the register. We roll this * runtime check into our general permission check code, so check * here that the reginfo's specified permissions are strict enough @@ -8855,7 +9305,8 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, assert((r->access & ~mask) == 0); } - /* Check that the register definition has enough info to handle + /* + * Check that the register definition has enough info to handle * reads and writes if they are permitted. */ if (!(r->type & (ARM_CP_SPECIAL_MASK | ARM_CP_CONST))) { @@ -8880,7 +9331,8 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, continue; } if (state == ARM_CP_STATE_AA32) { - /* Under AArch32 CP registers can be common + /* + * Under AArch32 CP registers can be common * (same for secure and non-secure world) or banked. */ char *name; @@ -8906,8 +9358,10 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, g_assert_not_reached(); } } else { - /* AArch64 registers get mapped to non-secure instance - * of AArch32 */ + /* + * AArch64 registers get mapped to non-secure instance + * of AArch32 + */ add_cpreg_to_hashtable(cpu, r, opaque, state, ARM_CP_SECSTATE_NS, crm, opc1, opc2, r->name); @@ -8993,7 +9447,8 @@ void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque) static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type) { - /* Return true if it is not valid for us to switch to + /* + * Return true if it is not valid for us to switch to * this CPU mode (ie all the UNPREDICTABLE cases in * the ARM ARM CPSRWriteByInstr pseudocode). */ @@ -9014,10 +9469,12 @@ static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type) case ARM_CPU_MODE_UND: case ARM_CPU_MODE_IRQ: case ARM_CPU_MODE_FIQ: - /* Note that we don't implement the IMPDEF NSACR.RFR which in v7 + /* + * Note that we don't implement the IMPDEF NSACR.RFR which in v7 * allows FIQ mode to be Secure-only. (In v8 this doesn't exist.) */ - /* If HCR.TGE is set then changes from Monitor to NS PL1 via MSR + /* + * If HCR.TGE is set then changes from Monitor to NS PL1 via MSR * and CPS are treated as illegal mode changes. */ if (write_type == CPSRWriteByInstr && @@ -9059,10 +9516,12 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, env->CF = (val >> 29) & 1; env->VF = (val << 3) & 0x80000000; } - if (mask & CPSR_Q) + if (mask & CPSR_Q) { env->QF = ((val & CPSR_Q) != 0); - if (mask & CPSR_T) + } + if (mask & CPSR_T) { env->thumb = ((val & CPSR_T) != 0); + } if (mask & CPSR_IT_0_1) { env->condexec_bits &= ~3; env->condexec_bits |= (val >> 25) & 3; @@ -9075,7 +9534,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, env->GE = (val >> 16) & 0xf; } - /* In a V7 implementation that includes the security extensions but does + /* + * In a V7 implementation that includes the security extensions but does * not include Virtualization Extensions the SCR.FW and SCR.AW bits control * whether non-secure software is allowed to change the CPSR_F and CPSR_A * bits respectively. @@ -9091,7 +9551,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, changed_daif = (env->daif ^ val) & mask; if (changed_daif & CPSR_A) { - /* Check to see if we are allowed to change the masking of async + /* + * Check to see if we are allowed to change the masking of async * abort exceptions from a non-secure state. */ if (!(env->cp15.scr_el3 & SCR_AW)) { @@ -9103,7 +9564,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, } if (changed_daif & CPSR_F) { - /* Check to see if we are allowed to change the masking of FIQ + /* + * Check to see if we are allowed to change the masking of FIQ * exceptions from a non-secure state. */ if (!(env->cp15.scr_el3 & SCR_FW)) { @@ -9113,7 +9575,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask &= ~CPSR_F; } - /* Check whether non-maskable FIQ (NMFI) support is enabled. + /* + * Check whether non-maskable FIQ (NMFI) support is enabled. * If this bit is set software is not allowed to mask * FIQs, but is allowed to set CPSR_F to 0. */ @@ -9133,7 +9596,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, if (write_type != CPSRWriteRaw && ((env->uncached_cpsr ^ val) & mask & CPSR_M)) { if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { - /* Note that we can only get here in USR mode if this is a + /* + * Note that we can only get here in USR mode if this is a * gdb stub write; for this case we follow the architectural * behaviour for guest writes in USR mode of ignoring an attempt * to switch mode. (Those are caught by translate.c for writes @@ -9141,7 +9605,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, */ mask &= ~CPSR_M; } else if (bad_mode_switch(env, val & CPSR_M, write_type)) { - /* Attempt to switch to an invalid mode: this is UNPREDICTABLE in + /* + * Attempt to switch to an invalid mode: this is UNPREDICTABLE in * v7, and has defined behaviour in v8: * + leave CPSR.M untouched * + allow changes to the other CPSR fields @@ -9261,15 +9726,16 @@ static void switch_mode(CPUARMState *env, int mode) int i; old_mode = env->uncached_cpsr & CPSR_M; - if (mode == old_mode) + if (mode == old_mode) { return; + } if (old_mode == ARM_CPU_MODE_FIQ) { - memcpy (env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); - memcpy (env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); + memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); } else if (mode == ARM_CPU_MODE_FIQ) { - memcpy (env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); - memcpy (env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); + memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); } i = bank_number(old_mode); @@ -9284,7 +9750,8 @@ static void switch_mode(CPUARMState *env, int mode) env->regs[14] = env->banked_r14[r14_bank_number(mode)]; } -/* Physical Interrupt Target EL Lookup Table +/* + * Physical Interrupt Target EL Lookup Table * * [ From ARM ARM section G1.13.4 (Table G1-15) ] * @@ -9358,7 +9825,8 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, if (arm_feature(env, ARM_FEATURE_EL3)) { rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW); } else { - /* Either EL2 is the highest EL (and so the EL2 register width + /* + * Either EL2 is the highest EL (and so the EL2 register width * is given by is64); or there is no EL2 or EL3, in which case * the value of 'rw' does not affect the table lookup anyway. */ @@ -9633,7 +10101,8 @@ void aarch64_sync_64_to_32(CPUARMState *env) env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23]; } - /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ + /* + * Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ * mode, then we can copy to r8-r14. Otherwise, we copy to the * FIQ bank for r8-r14. */ @@ -9865,10 +10334,11 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) new_mode = ARM_CPU_MODE_UND; addr = 0x04; mask = CPSR_I; - if (env->thumb) + if (env->thumb) { offset = 2; - else + } else { offset = 4; + } break; case EXCP_SWI: new_mode = ARM_CPU_MODE_SVC; @@ -9979,7 +10449,8 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) /* High vectors. When enabled, base address cannot be remapped. */ addr += 0xffff0000; } else { - /* ARM v7 architectures provide a vector base address register to remap + /* + * ARM v7 architectures provide a vector base address register to remap * the interrupt vector table. * This register is only followed in non-monitor mode, and is banked. * Note: only bits 31:5 are valid. @@ -10113,7 +10584,8 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) aarch64_sve_change_el(env, cur_el, new_el, is_a64(env)); if (cur_el < new_el) { - /* Entry vector offset depends on whether the implemented EL + /* + * Entry vector offset depends on whether the implemented EL * immediately lower than the target level is using AArch32 or AArch64 */ bool is_aa64; @@ -10314,7 +10786,8 @@ static void handle_semihosting(CPUState *cs) } #endif -/* Handle a CPU exception for A and R profile CPUs. +/* + * Handle a CPU exception for A and R profile CPUs. * Do any appropriate logging, handle PSCI calls, and then hand off * to the AArch64-entry or AArch32-entry function depending on the * target exception level's register width. @@ -10359,7 +10832,8 @@ void arm_cpu_do_interrupt(CPUState *cs) } #endif - /* Hooks may change global state so BQL should be held, also the + /* + * Hooks may change global state so BQL should be held, also the * BQL needs to be held for any modification of * cs->interrupt_request. */ @@ -10640,9 +11114,11 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, }; } -/* Note that signed overflow is undefined in C. The following routines are - careful to use unsigned types where modulo arithmetic is required. - Failure to do so _will_ break on newer gcc. */ +/* + * Note that signed overflow is undefined in C. The following routines are + * careful to use unsigned types where modulo arithmetic is required. + * Failure to do so _will_ break on newer gcc. + */ /* Signed saturating arithmetic. */ @@ -10653,10 +11129,11 @@ static inline uint16_t add16_sat(uint16_t a, uint16_t b) res = a + b; if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { - if (a & 0x8000) + if (a & 0x8000) { res = 0x8000; - else + } else { res = 0x7fff; + } } return res; } @@ -10668,10 +11145,11 @@ static inline uint8_t add8_sat(uint8_t a, uint8_t b) res = a + b; if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { - if (a & 0x80) + if (a & 0x80) { res = 0x80; - else + } else { res = 0x7f; + } } return res; } @@ -10683,10 +11161,11 @@ static inline uint16_t sub16_sat(uint16_t a, uint16_t b) res = a - b; if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { - if (a & 0x8000) + if (a & 0x8000) { res = 0x8000; - else + } else { res = 0x7fff; + } } return res; } @@ -10698,10 +11177,11 @@ static inline uint8_t sub8_sat(uint8_t a, uint8_t b) res = a - b; if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { - if (a & 0x80) + if (a & 0x80) { res = 0x80; - else + } else { res = 0x7f; + } } return res; } @@ -10719,34 +11199,38 @@ static inline uint16_t add16_usat(uint16_t a, uint16_t b) { uint16_t res; res = a + b; - if (res < a) + if (res < a) { res = 0xffff; + } return res; } static inline uint16_t sub16_usat(uint16_t a, uint16_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return 0; + } } static inline uint8_t add8_usat(uint8_t a, uint8_t b) { uint8_t res; res = a + b; - if (res < a) + if (res < a) { res = 0xff; + } return res; } static inline uint8_t sub8_usat(uint8_t a, uint8_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return 0; + } } #define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); @@ -10764,7 +11248,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if (sum >= 0) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define SARITH8(a, b, n, op) do { \ int32_t sum; \ @@ -10772,7 +11256,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if (sum >= 0) \ ge |= 1 << n; \ - } while(0) + } while (0) #define ADD16(a, b, n) SARITH16(a, b, n, +) @@ -10791,7 +11275,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if ((sum >> 16) == 1) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define ADD8(a, b, n) do { \ uint32_t sum; \ @@ -10799,7 +11283,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if ((sum >> 8) == 1) \ ge |= 1 << n; \ - } while(0) + } while (0) #define SUB16(a, b, n) do { \ uint32_t sum; \ @@ -10807,7 +11291,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if ((sum >> 16) == 0) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define SUB8(a, b, n) do { \ uint32_t sum; \ @@ -10815,7 +11299,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if ((sum >> 8) == 0) \ ge |= 1 << n; \ - } while(0) + } while (0) #define PFX u #define ARITH_GE @@ -10850,10 +11334,11 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) static inline uint8_t do_usad(uint8_t a, uint8_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return b - a; + } } /* Unsigned sum of absolute byte differences. */ @@ -10873,18 +11358,23 @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) uint32_t mask; mask = 0; - if (flags & 1) + if (flags & 1) { mask |= 0xff; - if (flags & 2) + } + if (flags & 2) { mask |= 0xff00; - if (flags & 4) + } + if (flags & 4) { mask |= 0xff0000; - if (flags & 8) + } + if (flags & 8) { mask |= 0xff000000; + } return (a & mask) | (b & ~mask); } -/* CRC helpers. +/* + * CRC helpers. * The upper bytes of val (above the number specified by 'bytes') must have * been zeroed out by the caller. */ @@ -10908,7 +11398,8 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Return the exception level to which FP-disabled exceptions should +/* + * Return the exception level to which FP-disabled exceptions should * be taken, or 0 if FP is enabled. */ int fp_exception_el(CPUARMState *env, int cur_el) @@ -10916,7 +11407,8 @@ int fp_exception_el(CPUARMState *env, int cur_el) #ifndef CONFIG_USER_ONLY uint64_t hcr_el2; - /* CPACR and the CPTR registers don't exist before v6, so FP is + /* + * CPACR and the CPTR registers don't exist before v6, so FP is * always accessible */ if (!arm_feature(env, ARM_FEATURE_V6)) { @@ -10941,7 +11433,8 @@ int fp_exception_el(CPUARMState *env, int cur_el) hcr_el2 = arm_hcr_el2_eff(env); - /* The CPACR controls traps to EL1, or PL1 if we're 32 bit: + /* + * The CPACR controls traps to EL1, or PL1 if we're 32 bit: * 0, 2 : trap EL0 and EL1/PL1 accesses * 1 : trap only EL0 accesses * 3 : trap no accesses diff --git a/target/arm/internals.h b/target/arm/internals.h index 161e42d..d955530 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -257,6 +257,10 @@ unsigned int arm_pamax(ARMCPU *cpu); static inline bool extended_addresses_enabled(CPUARMState *env) { uint64_t tcr = env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + return true; + } return arm_el_is_aa64(env, 1) || (arm_feature(env, ARM_FEATURE_LPAE) && (tcr & TTBCR_EAE)); } diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 355cd4d..033a4d9 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -7,30 +7,14 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" -#include "target/arm/idau.h" -#include "trace.h" #include "cpu.h" #include "internals.h" -#include "exec/gdbstub.h" #include "exec/helper-proto.h" -#include "qemu/host-utils.h" #include "qemu/main-loop.h" #include "qemu/bitops.h" -#include "qemu/crc32c.h" -#include "qemu/qemu-print.h" #include "qemu/log.h" #include "exec/exec-all.h" -#include <zlib.h> /* For crc32 */ -#include "semihosting/semihost.h" -#include "sysemu/cpus.h" -#include "sysemu/kvm.h" -#include "qemu/range.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/error.h" -#include "qemu/guest-random.h" #ifdef CONFIG_TCG -#include "arm_ldst.h" #include "exec/cpu_ldst.h" #include "semihosting/common-semi.h" #endif diff --git a/target/arm/machine.c b/target/arm/machine.c index 54c5c62..5f26152 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -487,6 +487,30 @@ static bool pmsav8_needed(void *opaque) arm_feature(env, ARM_FEATURE_V8); } +static bool pmsav8r_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8) && + !arm_feature(env, ARM_FEATURE_M); +} + +static const VMStateDescription vmstate_pmsav8r = { + .name = "cpu/pmsav8/pmsav8r", + .version_id = 1, + .minimum_version_id = 1, + .needed = pmsav8r_needed, + .fields = (VMStateField[]) { + VMSTATE_VARRAY_UINT32(env.pmsav8.hprbar, ARMCPU, + pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(env.pmsav8.hprlar, ARMCPU, + pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_pmsav8 = { .name = "cpu/pmsav8", .version_id = 1, @@ -500,6 +524,10 @@ static const VMStateDescription vmstate_pmsav8 = { VMSTATE_UINT32(env.pmsav8.mair0[M_REG_NS], ARMCPU), VMSTATE_UINT32(env.pmsav8.mair1[M_REG_NS], ARMCPU), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_pmsav8r, + NULL } }; diff --git a/target/arm/ptw.c b/target/arm/ptw.c index f812734..4bda059 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -1758,9 +1758,13 @@ static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, if (arm_feature(env, ARM_FEATURE_M)) { return env->v7m.mpu_ctrl[is_secure] & R_V7M_MPU_CTRL_PRIVDEFENA_MASK; - } else { - return regime_sctlr(env, mmu_idx) & SCTLR_BR; } + + if (mmu_idx == ARMMMUIdx_Stage2) { + return false; + } + + return regime_sctlr(env, mmu_idx) & SCTLR_BR; } static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, @@ -1952,6 +1956,26 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, return !(result->f.prot & (1 << access_type)); } +static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t secure) +{ + if (regime_el(env, mmu_idx) == 2) { + return env->pmsav8.hprbar; + } else { + return env->pmsav8.rbar[secure]; + } +} + +static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t secure) +{ + if (regime_el(env, mmu_idx) == 2) { + return env->pmsav8.hprlar; + } else { + return env->pmsav8.rlar[secure]; + } +} + bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, MMUAccessType access_type, ARMMMUIdx mmu_idx, bool secure, GetPhysAddrResult *result, @@ -1974,6 +1998,13 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, bool hit = false; uint32_t addr_page_base = address & TARGET_PAGE_MASK; uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); + int region_counter; + + if (regime_el(env, mmu_idx) == 2) { + region_counter = cpu->pmsav8r_hdregion; + } else { + region_counter = cpu->pmsav7_dregion; + } result->f.lg_page_size = TARGET_PAGE_BITS; result->f.phys_addr = address; @@ -1982,6 +2013,10 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, *mregion = -1; } + if (mmu_idx == ARMMMUIdx_Stage2) { + fi->stage2 = true; + } + /* * Unlike the ARM ARM pseudocode, we don't need to check whether this * was an exception vector read from the vector table (which is always @@ -1998,17 +2033,26 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, hit = true; } - for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { + uint32_t bitmask; + if (arm_feature(env, ARM_FEATURE_M)) { + bitmask = 0x1f; + } else { + bitmask = 0x3f; + fi->level = 0; + } + + for (n = region_counter - 1; n >= 0; n--) { /* region search */ /* - * Note that the base address is bits [31:5] from the register - * with bits [4:0] all zeroes, but the limit address is bits - * [31:5] from the register with bits [4:0] all ones. + * Note that the base address is bits [31:x] from the register + * with bits [x-1:0] all zeroes, but the limit address is bits + * [31:x] from the register with bits [x:0] all ones. Where x is + * 5 for Cortex-M and 6 for Cortex-R */ - uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f; - uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f; + uint32_t base = regime_rbar(env, mmu_idx, secure)[n] & ~bitmask; + uint32_t limit = regime_rlar(env, mmu_idx, secure)[n] | bitmask; - if (!(env->pmsav8.rlar[secure][n] & 0x1)) { + if (!(regime_rlar(env, mmu_idx, secure)[n] & 0x1)) { /* Region disabled */ continue; } @@ -2042,7 +2086,9 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, * PMSAv7 where highest-numbered-region wins) */ fi->type = ARMFault_Permission; - fi->level = 1; + if (arm_feature(env, ARM_FEATURE_M)) { + fi->level = 1; + } return true; } @@ -2052,8 +2098,11 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, } if (!hit) { - /* background fault */ - fi->type = ARMFault_Background; + if (arm_feature(env, ARM_FEATURE_M)) { + fi->type = ARMFault_Background; + } else { + fi->type = ARMFault_Permission; + } return true; } @@ -2061,12 +2110,14 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, /* hit using the background region */ get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot); } else { - uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2); - uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1); + uint32_t matched_rbar = regime_rbar(env, mmu_idx, secure)[matchregion]; + uint32_t matched_rlar = regime_rlar(env, mmu_idx, secure)[matchregion]; + uint32_t ap = extract32(matched_rbar, 1, 2); + uint32_t xn = extract32(matched_rbar, 0, 1); bool pxn = false; if (arm_feature(env, ARM_FEATURE_V8_1M)) { - pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1); + pxn = extract32(matched_rlar, 4, 1); } if (m_is_system_region(env, address)) { @@ -2074,21 +2125,46 @@ bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, xn = 1; } - result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap); + if (regime_el(env, mmu_idx) == 2) { + result->f.prot = simple_ap_to_rw_prot_is_user(ap, + mmu_idx != ARMMMUIdx_E2); + } else { + result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap); + } + + if (!arm_feature(env, ARM_FEATURE_M)) { + uint8_t attrindx = extract32(matched_rlar, 1, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + uint8_t sh = extract32(matched_rlar, 3, 2); + + if (regime_sctlr(env, mmu_idx) & SCTLR_WXN && + result->f.prot & PAGE_WRITE && mmu_idx != ARMMMUIdx_Stage2) { + xn = 0x1; + } + + if ((regime_el(env, mmu_idx) == 1) && + regime_sctlr(env, mmu_idx) & SCTLR_UWXN && ap == 0x1) { + pxn = 0x1; + } + + result->cacheattrs.is_s2_format = false; + result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); + result->cacheattrs.shareability = sh; + } + if (result->f.prot && !xn && !(pxn && !is_user)) { result->f.prot |= PAGE_EXEC; } - /* - * We don't need to look the attribute up in the MAIR0/MAIR1 - * registers because that only tells us about cacheability. - */ + if (mregion) { *mregion = matchregion; } } fi->type = ARMFault_Permission; - fi->level = 1; + if (arm_feature(env, ARM_FEATURE_M)) { + fi->level = 1; + } return !(result->f.prot & (1 << access_type)); } @@ -2361,7 +2437,11 @@ static uint8_t combined_attrs_nofwb(uint64_t hcr, { uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs; - s2_mair_attrs = convert_stage2_attrs(hcr, s2.attrs); + if (s2.is_s2_format) { + s2_mair_attrs = convert_stage2_attrs(hcr, s2.attrs); + } else { + s2_mair_attrs = s2.attrs; + } s1lo = extract32(s1.attrs, 0, 4); s2lo = extract32(s2_mair_attrs, 0, 4); @@ -2418,6 +2498,8 @@ static uint8_t force_cacheattr_nibble_wb(uint8_t attr) */ static uint8_t combined_attrs_fwb(ARMCacheAttrs s1, ARMCacheAttrs s2) { + assert(s2.is_s2_format && !s1.is_s2_format); + switch (s2.attrs) { case 7: /* Use stage 1 attributes */ @@ -2467,7 +2549,7 @@ static ARMCacheAttrs combine_cacheattrs(uint64_t hcr, ARMCacheAttrs ret; bool tagged = false; - assert(s2.is_s2_format && !s1.is_s2_format); + assert(!s1.is_s2_format); ret.is_s2_format = false; if (s1.attrs == 0xf0) { @@ -2643,7 +2725,13 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, cacheattrs1 = result->cacheattrs; memset(result, 0, sizeof(*result)); - ret = get_phys_addr_lpae(env, ptw, ipa, access_type, is_el0, result, fi); + if (arm_feature(env, ARM_FEATURE_PMSA)) { + ret = get_phys_addr_pmsav8(env, ipa, access_type, + ptw->in_mmu_idx, is_secure, result, fi); + } else { + ret = get_phys_addr_lpae(env, ptw, ipa, access_type, + is_el0, result, fi); + } fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ @@ -2655,10 +2743,20 @@ static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, } /* - * Use the maximum of the S1 & S2 page size, so that invalidation - * of pages > TARGET_PAGE_SIZE works correctly. + * If either S1 or S2 returned a result smaller than TARGET_PAGE_SIZE, + * this means "don't put this in the TLB"; in this case, return a + * result with lg_page_size == 0 to achieve that. Otherwise, + * use the maximum of the S1 & S2 page size, so that invalidation + * of pages > TARGET_PAGE_SIZE works correctly. (This works even though + * we know the combined result permissions etc only cover the minimum + * of the S1 and S2 page size, because we know that the common TLB code + * never actually creates TLB entries bigger than TARGET_PAGE_SIZE, + * and passing a larger page size value only affects invalidations.) */ - if (result->f.lg_page_size < s1_lgpgsz) { + if (result->f.lg_page_size < TARGET_PAGE_BITS || + s1_lgpgsz < TARGET_PAGE_BITS) { + result->f.lg_page_size = 0; + } else if (result->f.lg_page_size < s1_lgpgsz) { result->f.lg_page_size = s1_lgpgsz; } diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 0f4f4fc..60abcbe 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -19,6 +19,10 @@ bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) if (el == 2 || arm_el_is_aa64(env, el)) { return true; } + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + return true; + } if (arm_feature(env, ARM_FEATURE_LPAE) && (regime_tcr(env, mmu_idx) & TTBCR_EAE)) { return true; diff --git a/target/arm/translate.c b/target/arm/translate.c index 74a9030..1dcaefb 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1184,7 +1184,7 @@ static inline void gen_hlt(DisasContext *s, int imm) * semihosting, to provide some semblance of security * (and for consistency with our 32-bit semihosting). */ - if (semihosting_enabled(s->current_el != 0) && + if (semihosting_enabled(s->current_el == 0) && (imm == (s->thumb ? 0x3c : 0xf000))) { gen_exception_internal_insn(s, EXCP_SEMIHOST); return; diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 658ca4f..807037c 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -86,7 +86,7 @@ static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr) return addr; } -/* HEX_REG_P3_0 (aka C4) is an alias for the predicate registers */ +/* HEX_REG_P3_0_ALIASED (aka C4) is an alias for the predicate registers */ static target_ulong read_p3_0(CPUHexagonState *env) { int32_t control_reg = 0; @@ -102,7 +102,7 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum) { target_ulong value; - if (regnum == HEX_REG_P3_0) { + if (regnum == HEX_REG_P3_0_ALIASED) { value = read_p3_0(env); } else { value = regnum < 32 ? adjust_stack_ptrs(env, env->gpr[regnum]) @@ -198,7 +198,7 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) print_reg(f, env, HEX_REG_M0); print_reg(f, env, HEX_REG_M1); print_reg(f, env, HEX_REG_USR); - print_reg(f, env, HEX_REG_P3_0); + print_reg(f, env, HEX_REG_P3_0_ALIASED); print_reg(f, env, HEX_REG_GP); print_reg(f, env, HEX_REG_UGP); print_reg(f, env, HEX_REG_PC); diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 6cf2e0e..90db990 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -43,6 +43,33 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) return pred; } +#define IMMUTABLE (~0) + +static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { + [HEX_REG_USR] = 0xc13000c0, + [HEX_REG_PC] = IMMUTABLE, + [HEX_REG_GP] = 0x3f, + [HEX_REG_UPCYCLELO] = IMMUTABLE, + [HEX_REG_UPCYCLEHI] = IMMUTABLE, + [HEX_REG_UTIMERLO] = IMMUTABLE, + [HEX_REG_UTIMERHI] = IMMUTABLE, +}; + +static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, + target_ulong reg_mask) +{ + if (reg_mask) { + TCGv tmp = tcg_temp_new(); + + /* new_val = (new_val & ~reg_mask) | (cur_val & reg_mask) */ + tcg_gen_andi_tl(new_val, new_val, ~reg_mask); + tcg_gen_andi_tl(tmp, cur_val, reg_mask); + tcg_gen_or_tl(new_val, new_val, tmp); + + tcg_temp_free(tmp); + } +} + static inline void gen_log_predicated_reg_write(int rnum, TCGv val, uint32_t slot) { @@ -69,6 +96,9 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, void gen_log_reg_write(int rnum, TCGv val) { + const target_ulong reg_mask = reg_immut_masks[rnum]; + + gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); tcg_gen_mov_tl(hex_new_value[rnum], val); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ @@ -114,19 +144,29 @@ static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) { + const target_ulong reg_mask_low = reg_immut_masks[rnum]; + const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; + TCGv val32 = tcg_temp_new(); + /* Low word */ - tcg_gen_extrl_i64_i32(hex_new_value[rnum], val); + tcg_gen_extrl_i64_i32(val32, val); + gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); + tcg_gen_mov_tl(hex_new_value[rnum], val32); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum], 1); } /* High word */ - tcg_gen_extrh_i64_i32(hex_new_value[rnum + 1], val); + tcg_gen_extrh_i64_i32(val32, val); + gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); + tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); } + + tcg_temp_free(val32); } void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) @@ -163,7 +203,7 @@ static inline void gen_read_p3_0(TCGv control_reg) /* * Certain control registers require special handling on read - * HEX_REG_P3_0 aliased to the predicate registers + * HEX_REG_P3_0_ALIASED aliased to the predicate registers * -> concat the 4 predicate registers together * HEX_REG_PC actual value stored in DisasContext * -> assign from ctx->base.pc_next @@ -173,7 +213,7 @@ static inline void gen_read_p3_0(TCGv control_reg) static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num, TCGv dest) { - if (reg_num == HEX_REG_P3_0) { + if (reg_num == HEX_REG_P3_0_ALIASED) { gen_read_p3_0(dest); } else if (reg_num == HEX_REG_PC) { tcg_gen_movi_tl(dest, ctx->base.pc_next); @@ -194,7 +234,7 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num, static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, TCGv_i64 dest) { - if (reg_num == HEX_REG_P3_0) { + if (reg_num == HEX_REG_P3_0_ALIASED) { TCGv p3_0 = tcg_temp_new(); gen_read_p3_0(p3_0); tcg_gen_concat_i32_i64(dest, p3_0, hex_gpr[reg_num + 1]); @@ -238,7 +278,7 @@ static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg) /* * Certain control registers require special handling on write - * HEX_REG_P3_0 aliased to the predicate registers + * HEX_REG_P3_0_ALIASED aliased to the predicate registers * -> break the value across 4 predicate registers * HEX_REG_QEMU_*_CNT changes in current TB in DisasContext * -> clear the changes @@ -246,7 +286,7 @@ static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg) static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, TCGv val) { - if (reg_num == HEX_REG_P3_0) { + if (reg_num == HEX_REG_P3_0_ALIASED) { gen_write_p3_0(ctx, val); } else { gen_log_reg_write(reg_num, val); @@ -266,7 +306,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, TCGv_i64 val) { - if (reg_num == HEX_REG_P3_0) { + if (reg_num == HEX_REG_P3_0_ALIASED) { TCGv val32 = tcg_temp_new(); tcg_gen_extrl_i64_i32(val32, val); gen_write_p3_0(ctx, val32); diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h index a63c2c0..bddfc28 100644 --- a/target/hexagon/hex_regs.h +++ b/target/hexagon/hex_regs.h @@ -58,7 +58,7 @@ enum { HEX_REG_LC0 = 33, HEX_REG_SA1 = 34, HEX_REG_LC1 = 35, - HEX_REG_P3_0 = 36, + HEX_REG_P3_0_ALIASED = 36, HEX_REG_M0 = 38, HEX_REG_M1 = 39, HEX_REG_USR = 40, diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst index 65e6bf4..ff6d141 100644 --- a/target/hexagon/idef-parser/README.rst +++ b/target/hexagon/idef-parser/README.rst @@ -552,11 +552,11 @@ to compare our buggy CPU state against, running :: - meson configure -Dhexagon_idef_parser_enabled=false + meson configure -Dhexagon_idef_parser=false will disable the idef-parser for all instructions and fallback on manual tinycode generator overrides, or on helper function implementations. Recompiling -gives us ``qemu-hexagon`` which passes all tests. If ``qemu-heaxgon-buggy`` is +gives us ``qemu-hexagon`` which passes all tests. If ``qemu-hexagon-buggy`` is our binary with the incorrect tinycode generators, we can compare the CPU state between the two versions diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index 8be44a0..c14cb39 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -99,6 +99,8 @@ /* Input file containing the description of each hexagon instruction */ input : instructions { + /* Suppress warning about unused yynerrs */ + (void) yynerrs; YYACCEPT; } ; diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index e8f250f..c9d31d0 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -197,7 +197,6 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs idef_parser_dir / 'parser-helpers.c'], include_directories: ['idef-parser', '../../include/'], dependencies: [glib_dep], - c_args: ['-Wextra'], native: true ) diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index add4d54..287659c 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -2163,22 +2163,13 @@ static int ppc_next_unmasked_interrupt(CPUPPCState *env) void ppc_maybe_interrupt(CPUPPCState *env) { CPUState *cs = env_cpu(env); - bool locked = false; - - if (!qemu_mutex_iothread_locked()) { - locked = true; - qemu_mutex_lock_iothread(); - } + QEMU_IOTHREAD_LOCK_GUARD(); if (ppc_next_unmasked_interrupt(env)) { cpu_interrupt(cs, CPU_INTERRUPT_HARD); } else { cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } - - if (locked) { - qemu_mutex_unlock_iothread(); - } } #if defined(TARGET_PPC64) diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index c0aee58..779e7db 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -22,6 +22,7 @@ #include "qemu/main-loop.h" #include "exec/exec-all.h" #include "sysemu/kvm.h" +#include "sysemu/tcg.h" #include "helper_regs.h" #include "power8-pmu.h" #include "cpu-models.h" @@ -203,17 +204,10 @@ void cpu_interrupt_exittb(CPUState *cs) { /* * We don't need to worry about translation blocks - * when running with KVM. + * unless running with TCG. */ - if (kvm_enabled()) { - return; - } - - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); - cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); - qemu_mutex_unlock_iothread(); - } else { + if (tcg_enabled()) { + QEMU_IOTHREAD_LOCK_GUARD(); cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); } } diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 6fe176e..cc75ca7 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -76,6 +76,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zicsr, true, PRIV_VERSION_1_10_0, ext_icsr), ISA_EXT_DATA_ENTRY(zifencei, true, PRIV_VERSION_1_10_0, ext_ifencei), ISA_EXT_DATA_ENTRY(zihintpause, true, PRIV_VERSION_1_10_0, ext_zihintpause), + ISA_EXT_DATA_ENTRY(zawrs, true, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfh, true, PRIV_VERSION_1_12_0, ext_zfh), ISA_EXT_DATA_ENTRY(zfhmin, true, PRIV_VERSION_1_12_0, ext_zfhmin), ISA_EXT_DATA_ENTRY(zfinx, true, PRIV_VERSION_1_12_0, ext_zfinx), @@ -382,6 +383,10 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags) CSR_MHARTID, CSR_MSTATUS, CSR_MSTATUSH, + /* + * CSR_SSTATUS is intentionally omitted here as its value + * can be figured out by looking at CSR_MSTATUS + */ CSR_HSTATUS, CSR_VSSTATUS, CSR_MIP, @@ -762,6 +767,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) return; } + if ((cpu->cfg.ext_zawrs) && !cpu->cfg.ext_a) { + error_setg(errp, "Zawrs extension requires A extension"); + return; + } + if ((cpu->cfg.ext_zfh || cpu->cfg.ext_zfhmin) && !cpu->cfg.ext_f) { error_setg(errp, "Zfh/Zfhmin extensions require F extension"); return; @@ -1017,6 +1027,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), + DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false), DEFINE_PROP_BOOL("Zfhmin", RISCVCPU, cfg.ext_zfhmin, false), DEFINE_PROP_BOOL("Zve32f", RISCVCPU, cfg.ext_zve32f, false), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 443d15a..f5609b6 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -329,6 +329,9 @@ struct CPUArchState { target_ulong tdata3[RV_MAX_TRIGGERS]; struct CPUBreakpoint *cpu_breakpoint[RV_MAX_TRIGGERS]; struct CPUWatchpoint *cpu_watchpoint[RV_MAX_TRIGGERS]; + QEMUTimer *itrigger_timer[RV_MAX_TRIGGERS]; + int64_t last_icount; + bool itrigger_enabled; /* machine specific rdtime callback */ uint64_t (*rdtime_fn)(void *); @@ -366,6 +369,9 @@ struct CPUArchState { /* CSRs for execution enviornment configuration */ uint64_t menvcfg; + uint64_t mstateen[SMSTATEEN_MAX_COUNT]; + uint64_t hstateen[SMSTATEEN_MAX_COUNT]; + uint64_t sstateen[SMSTATEEN_MAX_COUNT]; target_ulong senvcfg; uint64_t henvcfg; #endif @@ -441,11 +447,13 @@ struct RISCVCPUConfig { bool ext_ifencei; bool ext_icsr; bool ext_zihintpause; + bool ext_smstateen; bool ext_sstc; bool ext_svinval; bool ext_svnapot; bool ext_svpbmt; bool ext_zdinx; + bool ext_zawrs; bool ext_zfh; bool ext_zfhmin; bool ext_zfinx; @@ -621,6 +629,8 @@ FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1) FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1) FIELD(TB_FLAGS, VTA, 24, 1) FIELD(TB_FLAGS, VMA, 25, 1) +/* Native debug itrigger */ +FIELD(TB_FLAGS, ITRIGGER, 26, 1) #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index d8f5f0a..8b0d7e2 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -197,6 +197,12 @@ /* Supervisor Configuration CSRs */ #define CSR_SENVCFG 0x10A +/* Supervisor state CSRs */ +#define CSR_SSTATEEN0 0x10C +#define CSR_SSTATEEN1 0x10D +#define CSR_SSTATEEN2 0x10E +#define CSR_SSTATEEN3 0x10F + /* Supervisor Trap Handling */ #define CSR_SSCRATCH 0x140 #define CSR_SEPC 0x141 @@ -244,6 +250,16 @@ #define CSR_HENVCFG 0x60A #define CSR_HENVCFGH 0x61A +/* Hypervisor state CSRs */ +#define CSR_HSTATEEN0 0x60C +#define CSR_HSTATEEN0H 0x61C +#define CSR_HSTATEEN1 0x60D +#define CSR_HSTATEEN1H 0x61D +#define CSR_HSTATEEN2 0x60E +#define CSR_HSTATEEN2H 0x61E +#define CSR_HSTATEEN3 0x60F +#define CSR_HSTATEEN3H 0x61F + /* Virtual CSRs */ #define CSR_VSSTATUS 0x200 #define CSR_VSIE 0x204 @@ -289,6 +305,27 @@ #define CSR_MENVCFG 0x30A #define CSR_MENVCFGH 0x31A +/* Machine state CSRs */ +#define CSR_MSTATEEN0 0x30C +#define CSR_MSTATEEN0H 0x31C +#define CSR_MSTATEEN1 0x30D +#define CSR_MSTATEEN1H 0x31D +#define CSR_MSTATEEN2 0x30E +#define CSR_MSTATEEN2H 0x31E +#define CSR_MSTATEEN3 0x30F +#define CSR_MSTATEEN3H 0x31F + +/* Common defines for all smstateen */ +#define SMSTATEEN_MAX_COUNT 4 +#define SMSTATEEN0_CS (1ULL << 0) +#define SMSTATEEN0_FCSR (1ULL << 1) +#define SMSTATEEN0_HSCONTXT (1ULL << 57) +#define SMSTATEEN0_IMSIC (1ULL << 58) +#define SMSTATEEN0_AIA (1ULL << 59) +#define SMSTATEEN0_SVSLCT (1ULL << 60) +#define SMSTATEEN0_HSENVCFG (1ULL << 62) +#define SMSTATEEN_STATEEN (1ULL << 63) + /* Enhanced Physical Memory Protection (ePMP) */ #define CSR_MSECCFG 0x747 #define CSR_MSECCFGH 0x757 diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 278d163..8ea3442 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -27,7 +27,9 @@ #include "tcg/tcg-op.h" #include "trace.h" #include "semihosting/common-semi.h" +#include "sysemu/cpu-timers.h" #include "cpu_bits.h" +#include "debug.h" int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch) { @@ -103,6 +105,9 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc, flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_VS, get_field(env->mstatus_hs, MSTATUS_VS)); } + if (riscv_feature(env, RISCV_FEATURE_DEBUG) && !icount_enabled()) { + flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled); + } #endif flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl); @@ -610,7 +615,6 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) CPURISCVState *env = &cpu->env; CPUState *cs = CPU(cpu); uint64_t gein, vsgein = 0, vstip = 0, old = env->mip; - bool locked = false; if (riscv_cpu_virt_enabled(env)) { gein = get_field(env->hstatus, HSTATUS_VGEIN); @@ -621,10 +625,7 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask; vstip = env->vstime_irq ? MIP_VSTIP : 0; - if (!qemu_mutex_iothread_locked()) { - locked = true; - qemu_mutex_lock_iothread(); - } + QEMU_IOTHREAD_LOCK_GUARD(); env->mip = (env->mip & ~mask) | (value & mask); @@ -634,10 +635,6 @@ uint64_t riscv_cpu_update_mip(RISCVCPU *cpu, uint64_t mask, uint64_t value) cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } - if (locked) { - qemu_mutex_unlock_iothread(); - } - return old; } @@ -670,6 +667,9 @@ void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv) if (newpriv == PRV_H) { newpriv = PRV_U; } + if (icount_enabled() && newpriv != env->priv) { + riscv_itrigger_update_priv(env); + } /* tlb_flush is unnecessary as mode is contained in mmu_idx */ env->priv = newpriv; env->xl = cpu_recompute_xl(env); @@ -706,24 +706,26 @@ static int get_physical_address_pmp(CPURISCVState *env, int *prot, int mode) { pmp_priv_t pmp_priv; - target_ulong tlb_size_pmp = 0; + int pmp_index = -1; if (!riscv_feature(env, RISCV_FEATURE_PMP)) { *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; return TRANSLATE_SUCCESS; } - if (!pmp_hart_has_privs(env, addr, size, 1 << access_type, &pmp_priv, - mode)) { + pmp_index = pmp_hart_has_privs(env, addr, size, 1 << access_type, + &pmp_priv, mode); + if (pmp_index < 0) { *prot = 0; return TRANSLATE_PMP_FAIL; } *prot = pmp_priv_to_page_prot(pmp_priv); - if (tlb_size != NULL) { - if (pmp_is_range_in_tlb(env, addr & ~(*tlb_size - 1), &tlb_size_pmp)) { - *tlb_size = tlb_size_pmp; - } + if ((tlb_size != NULL) && pmp_index != MAX_RISCV_PMPS) { + target_ulong tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); + target_ulong tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; + + *tlb_size = pmp_get_tlb_size(env, pmp_index, tlb_sa, tlb_ea); } return TRANSLATE_SUCCESS; @@ -1248,6 +1250,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } } + pmu_tlb_fill_incr_ctr(cpu, access_type); if (riscv_cpu_virt_enabled(env) || ((riscv_cpu_two_stage_lookup(mmu_idx) || two_stage_lookup) && access_type != MMU_INST_FETCH)) { @@ -1311,7 +1314,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } } } else { - pmu_tlb_fill_incr_ctr(cpu, access_type); /* Single stage lookup */ ret = get_physical_address(env, &pa, &prot, address, NULL, access_type, mmu_idx, true, false, false); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 5c9a7ee..0db2c23 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -41,6 +41,42 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) } /* Predicates */ +#if !defined(CONFIG_USER_ONLY) +static RISCVException smstateen_acc_ok(CPURISCVState *env, int index, + uint64_t bit) +{ + bool virt = riscv_cpu_virt_enabled(env); + CPUState *cs = env_cpu(env); + RISCVCPU *cpu = RISCV_CPU(cs); + + if (env->priv == PRV_M || !cpu->cfg.ext_smstateen) { + return RISCV_EXCP_NONE; + } + + if (!(env->mstateen[index] & bit)) { + return RISCV_EXCP_ILLEGAL_INST; + } + + if (virt) { + if (!(env->hstateen[index] & bit)) { + return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + + if (env->priv == PRV_U && !(env->sstateen[index] & bit)) { + return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + } + + if (env->priv == PRV_U && riscv_has_ext(env, RVS)) { + if (!(env->sstateen[index] & bit)) { + return RISCV_EXCP_ILLEGAL_INST; + } + } + + return RISCV_EXCP_NONE; +} +#endif + static RISCVException fs(CPURISCVState *env, int csrno) { #if !defined(CONFIG_USER_ONLY) @@ -283,6 +319,72 @@ static RISCVException umode32(CPURISCVState *env, int csrno) return umode(env, csrno); } +static RISCVException mstateen(CPURISCVState *env, int csrno) +{ + CPUState *cs = env_cpu(env); + RISCVCPU *cpu = RISCV_CPU(cs); + + if (!cpu->cfg.ext_smstateen) { + return RISCV_EXCP_ILLEGAL_INST; + } + + return any(env, csrno); +} + +static RISCVException hstateen_pred(CPURISCVState *env, int csrno, int base) +{ + CPUState *cs = env_cpu(env); + RISCVCPU *cpu = RISCV_CPU(cs); + + if (!cpu->cfg.ext_smstateen) { + return RISCV_EXCP_ILLEGAL_INST; + } + + if (env->priv < PRV_M) { + if (!(env->mstateen[csrno - base] & SMSTATEEN_STATEEN)) { + return RISCV_EXCP_ILLEGAL_INST; + } + } + + return hmode(env, csrno); +} + +static RISCVException hstateen(CPURISCVState *env, int csrno) +{ + return hstateen_pred(env, csrno, CSR_HSTATEEN0); +} + +static RISCVException hstateenh(CPURISCVState *env, int csrno) +{ + return hstateen_pred(env, csrno, CSR_HSTATEEN0H); +} + +static RISCVException sstateen(CPURISCVState *env, int csrno) +{ + bool virt = riscv_cpu_virt_enabled(env); + int index = csrno - CSR_SSTATEEN0; + CPUState *cs = env_cpu(env); + RISCVCPU *cpu = RISCV_CPU(cs); + + if (!cpu->cfg.ext_smstateen) { + return RISCV_EXCP_ILLEGAL_INST; + } + + if (env->priv < PRV_M) { + if (!(env->mstateen[index] & SMSTATEEN_STATEEN)) { + return RISCV_EXCP_ILLEGAL_INST; + } + + if (virt) { + if (!(env->hstateen[index] & SMSTATEEN_STATEEN)) { + return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; + } + } + } + + return smode(env, csrno); +} + /* Checks if PointerMasking registers could be accessed */ static RISCVException pointer_masking(CPURISCVState *env, int csrno) { @@ -838,7 +940,7 @@ static RISCVException sstc(CPURISCVState *env, int csrno) } if (riscv_cpu_virt_enabled(env)) { - if (!(get_field(env->hcounteren, COUNTEREN_TM) & + if (!(get_field(env->hcounteren, COUNTEREN_TM) && get_field(env->henvcfg, HENVCFG_STCE))) { return RISCV_EXCP_VIRT_INSTRUCTION_FAULT; } @@ -1808,6 +1910,13 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, static RISCVException read_senvcfg(CPURISCVState *env, int csrno, target_ulong *val) { + RISCVException ret; + + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + *val = env->senvcfg; return RISCV_EXCP_NONE; } @@ -1816,15 +1925,27 @@ static RISCVException write_senvcfg(CPURISCVState *env, int csrno, target_ulong val) { uint64_t mask = SENVCFG_FIOM | SENVCFG_CBIE | SENVCFG_CBCFE | SENVCFG_CBZE; + RISCVException ret; - env->senvcfg = (env->senvcfg & ~mask) | (val & mask); + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + env->senvcfg = (env->senvcfg & ~mask) | (val & mask); return RISCV_EXCP_NONE; } static RISCVException read_henvcfg(CPURISCVState *env, int csrno, target_ulong *val) { + RISCVException ret; + + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + *val = env->henvcfg; return RISCV_EXCP_NONE; } @@ -1833,6 +1954,12 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, target_ulong val) { uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; + RISCVException ret; + + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= HENVCFG_PBMTE | HENVCFG_STCE; @@ -1846,6 +1973,13 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, target_ulong *val) { + RISCVException ret; + + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + *val = env->henvcfg >> 32; return RISCV_EXCP_NONE; } @@ -1855,12 +1989,208 @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, { uint64_t mask = HENVCFG_PBMTE | HENVCFG_STCE; uint64_t valh = (uint64_t)val << 32; + RISCVException ret; + + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); + if (ret != RISCV_EXCP_NONE) { + return ret; + } env->henvcfg = (env->henvcfg & ~mask) | (valh & mask); + return RISCV_EXCP_NONE; +} + +static RISCVException read_mstateen(CPURISCVState *env, int csrno, + target_ulong *val) +{ + *val = env->mstateen[csrno - CSR_MSTATEEN0]; + + return RISCV_EXCP_NONE; +} + +static RISCVException write_mstateen(CPURISCVState *env, int csrno, + uint64_t wr_mask, target_ulong new_val) +{ + uint64_t *reg; + + reg = &env->mstateen[csrno - CSR_MSTATEEN0]; + *reg = (*reg & ~wr_mask) | (new_val & wr_mask); + + return RISCV_EXCP_NONE; +} + +static RISCVException write_mstateen0(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + + return write_mstateen(env, csrno, wr_mask, new_val); +} + +static RISCVException write_mstateen_1_3(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + return write_mstateen(env, csrno, SMSTATEEN_STATEEN, new_val); +} + +static RISCVException read_mstateenh(CPURISCVState *env, int csrno, + target_ulong *val) +{ + *val = env->mstateen[csrno - CSR_MSTATEEN0H] >> 32; + + return RISCV_EXCP_NONE; +} + +static RISCVException write_mstateenh(CPURISCVState *env, int csrno, + uint64_t wr_mask, target_ulong new_val) +{ + uint64_t *reg, val; + + reg = &env->mstateen[csrno - CSR_MSTATEEN0H]; + val = (uint64_t)new_val << 32; + val |= *reg & 0xFFFFFFFF; + *reg = (*reg & ~wr_mask) | (val & wr_mask); + + return RISCV_EXCP_NONE; +} + +static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + + return write_mstateenh(env, csrno, wr_mask, new_val); +} + +static RISCVException write_mstateenh_1_3(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + return write_mstateenh(env, csrno, SMSTATEEN_STATEEN, new_val); +} + +static RISCVException read_hstateen(CPURISCVState *env, int csrno, + target_ulong *val) +{ + int index = csrno - CSR_HSTATEEN0; + + *val = env->hstateen[index] & env->mstateen[index]; + + return RISCV_EXCP_NONE; +} + +static RISCVException write_hstateen(CPURISCVState *env, int csrno, + uint64_t mask, target_ulong new_val) +{ + int index = csrno - CSR_HSTATEEN0; + uint64_t *reg, wr_mask; + + reg = &env->hstateen[index]; + wr_mask = env->mstateen[index] & mask; + *reg = (*reg & ~wr_mask) | (new_val & wr_mask); + + return RISCV_EXCP_NONE; +} + +static RISCVException write_hstateen0(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + + return write_hstateen(env, csrno, wr_mask, new_val); +} + +static RISCVException write_hstateen_1_3(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + return write_hstateen(env, csrno, SMSTATEEN_STATEEN, new_val); +} + +static RISCVException read_hstateenh(CPURISCVState *env, int csrno, + target_ulong *val) +{ + int index = csrno - CSR_HSTATEEN0H; + + *val = (env->hstateen[index] >> 32) & (env->mstateen[index] >> 32); return RISCV_EXCP_NONE; } +static RISCVException write_hstateenh(CPURISCVState *env, int csrno, + uint64_t mask, target_ulong new_val) +{ + int index = csrno - CSR_HSTATEEN0H; + uint64_t *reg, wr_mask, val; + + reg = &env->hstateen[index]; + val = (uint64_t)new_val << 32; + val |= *reg & 0xFFFFFFFF; + wr_mask = env->mstateen[index] & mask; + *reg = (*reg & ~wr_mask) | (val & wr_mask); + + return RISCV_EXCP_NONE; +} + +static RISCVException write_hstateen0h(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + + return write_hstateenh(env, csrno, wr_mask, new_val); +} + +static RISCVException write_hstateenh_1_3(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + return write_hstateenh(env, csrno, SMSTATEEN_STATEEN, new_val); +} + +static RISCVException read_sstateen(CPURISCVState *env, int csrno, + target_ulong *val) +{ + bool virt = riscv_cpu_virt_enabled(env); + int index = csrno - CSR_SSTATEEN0; + + *val = env->sstateen[index] & env->mstateen[index]; + if (virt) { + *val &= env->hstateen[index]; + } + + return RISCV_EXCP_NONE; +} + +static RISCVException write_sstateen(CPURISCVState *env, int csrno, + uint64_t mask, target_ulong new_val) +{ + bool virt = riscv_cpu_virt_enabled(env); + int index = csrno - CSR_SSTATEEN0; + uint64_t wr_mask; + uint64_t *reg; + + wr_mask = env->mstateen[index] & mask; + if (virt) { + wr_mask &= env->hstateen[index]; + } + + reg = &env->sstateen[index]; + *reg = (*reg & ~wr_mask) | (new_val & wr_mask); + + return RISCV_EXCP_NONE; +} + +static RISCVException write_sstateen0(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + + return write_sstateen(env, csrno, wr_mask, new_val); +} + +static RISCVException write_sstateen_1_3(CPURISCVState *env, int csrno, + target_ulong new_val) +{ + return write_sstateen(env, csrno, SMSTATEEN_STATEEN, new_val); +} + static RISCVException rmw_mip64(CPURISCVState *env, int csrno, uint64_t *ret_val, uint64_t new_val, uint64_t wr_mask) @@ -3744,6 +4074,65 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_HENVCFGH] = { "henvcfgh", hmode32, read_henvcfgh, write_henvcfgh, .min_priv_ver = PRIV_VERSION_1_12_0 }, + /* Smstateen extension CSRs */ + [CSR_MSTATEEN0] = { "mstateen0", mstateen, read_mstateen, write_mstateen0, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN0H] = { "mstateen0h", mstateen, read_mstateenh, + write_mstateen0h, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN1] = { "mstateen1", mstateen, read_mstateen, + write_mstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN1H] = { "mstateen1h", mstateen, read_mstateenh, + write_mstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN2] = { "mstateen2", mstateen, read_mstateen, + write_mstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN2H] = { "mstateen2h", mstateen, read_mstateenh, + write_mstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN3] = { "mstateen3", mstateen, read_mstateen, + write_mstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_MSTATEEN3H] = { "mstateen3h", mstateen, read_mstateenh, + write_mstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN0] = { "hstateen0", hstateen, read_hstateen, write_hstateen0, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN0H] = { "hstateen0h", hstateenh, read_hstateenh, + write_hstateen0h, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN1] = { "hstateen1", hstateen, read_hstateen, + write_hstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN1H] = { "hstateen1h", hstateenh, read_hstateenh, + write_hstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN2] = { "hstateen2", hstateen, read_hstateen, + write_hstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN2H] = { "hstateen2h", hstateenh, read_hstateenh, + write_hstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN3] = { "hstateen3", hstateen, read_hstateen, + write_hstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_HSTATEEN3H] = { "hstateen3h", hstateenh, read_hstateenh, + write_hstateenh_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_SSTATEEN0] = { "sstateen0", sstateen, read_sstateen, write_sstateen0, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_SSTATEEN1] = { "sstateen1", sstateen, read_sstateen, + write_sstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_SSTATEEN2] = { "sstateen2", sstateen, read_sstateen, + write_sstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_SSTATEEN3] = { "sstateen3", sstateen, read_sstateen, + write_sstateen_1_3, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + /* Supervisor Trap Setup */ [CSR_SSTATUS] = { "sstatus", smode, read_sstatus, write_sstatus, NULL, read_sstatus_i128 }, diff --git a/target/riscv/debug.c b/target/riscv/debug.c index e44848d..bf4840a 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -29,6 +29,8 @@ #include "cpu.h" #include "trace.h" #include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "sysemu/cpu-timers.h" /* * The following M-mode trigger CSRs are implemented: @@ -496,10 +498,209 @@ static void type6_reg_write(CPURISCVState *env, target_ulong index, return; } +/* icount trigger type */ +static inline int +itrigger_get_count(CPURISCVState *env, int index) +{ + return get_field(env->tdata1[index], ITRIGGER_COUNT); +} + +static inline void +itrigger_set_count(CPURISCVState *env, int index, int value) +{ + env->tdata1[index] = set_field(env->tdata1[index], + ITRIGGER_COUNT, value); +} + +static bool check_itrigger_priv(CPURISCVState *env, int index) +{ + target_ulong tdata1 = env->tdata1[index]; + if (riscv_cpu_virt_enabled(env)) { + /* check VU/VS bit against current privilege level */ + return (get_field(tdata1, ITRIGGER_VS) == env->priv) || + (get_field(tdata1, ITRIGGER_VU) == env->priv); + } else { + /* check U/S/M bit against current privilege level */ + return (get_field(tdata1, ITRIGGER_M) == env->priv) || + (get_field(tdata1, ITRIGGER_S) == env->priv) || + (get_field(tdata1, ITRIGGER_U) == env->priv); + } +} + +bool riscv_itrigger_enabled(CPURISCVState *env) +{ + int count; + for (int i = 0; i < RV_MAX_TRIGGERS; i++) { + if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) { + continue; + } + if (check_itrigger_priv(env, i)) { + continue; + } + count = itrigger_get_count(env, i); + if (!count) { + continue; + } + return true; + } + + return false; +} + +void helper_itrigger_match(CPURISCVState *env) +{ + int count; + for (int i = 0; i < RV_MAX_TRIGGERS; i++) { + if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) { + continue; + } + if (check_itrigger_priv(env, i)) { + continue; + } + count = itrigger_get_count(env, i); + if (!count) { + continue; + } + itrigger_set_count(env, i, count--); + if (!count) { + env->itrigger_enabled = riscv_itrigger_enabled(env); + do_trigger_action(env, i); + } + } +} + +static void riscv_itrigger_update_count(CPURISCVState *env) +{ + int count, executed; + /* + * Record last icount, so that we can evaluate the executed instructions + * since last priviledge mode change or timer expire. + */ + int64_t last_icount = env->last_icount, current_icount; + current_icount = env->last_icount = icount_get_raw(); + + for (int i = 0; i < RV_MAX_TRIGGERS; i++) { + if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) { + continue; + } + count = itrigger_get_count(env, i); + if (!count) { + continue; + } + /* + * Only when priviledge is changed or itrigger timer expires, + * the count field in itrigger tdata1 register is updated. + * And the count field in itrigger only contains remaining value. + */ + if (check_itrigger_priv(env, i)) { + /* + * If itrigger enabled in this priviledge mode, the number of + * executed instructions since last priviledge change + * should be reduced from current itrigger count. + */ + executed = current_icount - last_icount; + itrigger_set_count(env, i, count - executed); + if (count == executed) { + do_trigger_action(env, i); + } + } else { + /* + * If itrigger is not enabled in this priviledge mode, + * the number of executed instructions will be discard and + * the count field in itrigger will not change. + */ + timer_mod(env->itrigger_timer[i], + current_icount + count); + } + } +} + +static void riscv_itrigger_timer_cb(void *opaque) +{ + riscv_itrigger_update_count((CPURISCVState *)opaque); +} + +void riscv_itrigger_update_priv(CPURISCVState *env) +{ + riscv_itrigger_update_count(env); +} + +static target_ulong itrigger_validate(CPURISCVState *env, + target_ulong ctrl) +{ + target_ulong val; + + /* validate the generic part first */ + val = tdata1_validate(env, ctrl, TRIGGER_TYPE_INST_CNT); + + /* validate unimplemented (always zero) bits */ + warn_always_zero_bit(ctrl, ITRIGGER_ACTION, "action"); + warn_always_zero_bit(ctrl, ITRIGGER_HIT, "hit"); + warn_always_zero_bit(ctrl, ITRIGGER_PENDING, "pending"); + + /* keep the mode and attribute bits */ + val |= ctrl & (ITRIGGER_VU | ITRIGGER_VS | ITRIGGER_U | ITRIGGER_S | + ITRIGGER_M | ITRIGGER_COUNT); + + return val; +} + +static void itrigger_reg_write(CPURISCVState *env, target_ulong index, + int tdata_index, target_ulong val) +{ + target_ulong new_val; + + switch (tdata_index) { + case TDATA1: + /* set timer for icount */ + new_val = itrigger_validate(env, val); + if (new_val != env->tdata1[index]) { + env->tdata1[index] = new_val; + if (icount_enabled()) { + env->last_icount = icount_get_raw(); + /* set the count to timer */ + timer_mod(env->itrigger_timer[index], + env->last_icount + itrigger_get_count(env, index)); + } else { + env->itrigger_enabled = riscv_itrigger_enabled(env); + } + } + break; + case TDATA2: + qemu_log_mask(LOG_UNIMP, + "tdata2 is not supported for icount trigger\n"); + break; + case TDATA3: + qemu_log_mask(LOG_UNIMP, + "tdata3 is not supported for icount trigger\n"); + break; + default: + g_assert_not_reached(); + } + + return; +} + +static int itrigger_get_adjust_count(CPURISCVState *env) +{ + int count = itrigger_get_count(env, env->trigger_cur), executed; + if ((count != 0) && check_itrigger_priv(env, env->trigger_cur)) { + executed = icount_get_raw() - env->last_icount; + count += executed; + } + return count; +} + target_ulong tdata_csr_read(CPURISCVState *env, int tdata_index) { + int trigger_type; switch (tdata_index) { case TDATA1: + trigger_type = extract_trigger_type(env, env->tdata1[env->trigger_cur]); + if ((trigger_type == TRIGGER_TYPE_INST_CNT) && icount_enabled()) { + return deposit64(env->tdata1[env->trigger_cur], 10, 14, + itrigger_get_adjust_count(env)); + } return env->tdata1[env->trigger_cur]; case TDATA2: return env->tdata2[env->trigger_cur]; @@ -528,6 +729,8 @@ void tdata_csr_write(CPURISCVState *env, int tdata_index, target_ulong val) type6_reg_write(env, env->trigger_cur, tdata_index, val); break; case TRIGGER_TYPE_INST_CNT: + itrigger_reg_write(env, env->trigger_cur, tdata_index, val); + break; case TRIGGER_TYPE_INT: case TRIGGER_TYPE_EXCP: case TRIGGER_TYPE_EXT_SRC: @@ -725,5 +928,7 @@ void riscv_trigger_init(CPURISCVState *env) env->tdata3[i] = 0; env->cpu_breakpoint[i] = NULL; env->cpu_watchpoint[i] = NULL; + env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, + riscv_itrigger_timer_cb, env); } } diff --git a/target/riscv/debug.h b/target/riscv/debug.h index a1226b4..c471748 100644 --- a/target/riscv/debug.h +++ b/target/riscv/debug.h @@ -118,6 +118,17 @@ enum { SIZE_NUM = 16 }; +/* itrigger filed masks */ +#define ITRIGGER_ACTION 0x3f +#define ITRIGGER_U BIT(6) +#define ITRIGGER_S BIT(7) +#define ITRIGGER_PENDING BIT(8) +#define ITRIGGER_M BIT(9) +#define ITRIGGER_COUNT (0x3fff << 10) +#define ITRIGGER_HIT BIT(24) +#define ITRIGGER_VU BIT(25) +#define ITRIGGER_VS BIT(26) + bool tdata_available(CPURISCVState *env, int tdata_index); target_ulong tselect_csr_read(CPURISCVState *env); @@ -134,4 +145,6 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); void riscv_trigger_init(CPURISCVState *env); +bool riscv_itrigger_enabled(CPURISCVState *env); +void riscv_itrigger_update_priv(CPURISCVState *env); #endif /* RISCV_DEBUG_H */ diff --git a/target/riscv/helper.h b/target/riscv/helper.h index a03014f..227c712 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -109,6 +109,8 @@ DEF_HELPER_1(sret, tl, env) DEF_HELPER_1(mret, tl, env) DEF_HELPER_1(wfi, void, env) DEF_HELPER_1(tlb_flush, void, env) +/* Native Debug */ +DEF_HELPER_1(itrigger_match, void, env) #endif /* Hypervisor functions */ diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index d0253b8..b7e7613 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -718,6 +718,10 @@ vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm11 vsetivli 11 .......... ..... 111 ..... 1010111 @r2_zimm10 vsetvl 1000000 ..... ..... 111 ..... 1010111 @r +# *** Zawrs Standard Extension *** +wrs_nto 000000001101 00000 000 00000 1110011 +wrs_sto 000000011101 00000 000 00000 1110011 + # *** RV32 Zba Standard Extension *** sh1add 0010000 .......... 010 ..... 0110011 @r sh2add 0010000 .......... 100 ..... 0110011 @r diff --git a/target/riscv/insn_trans/trans_privileged.c.inc b/target/riscv/insn_trans/trans_privileged.c.inc index 3281408..59501b2 100644 --- a/target/riscv/insn_trans/trans_privileged.c.inc +++ b/target/riscv/insn_trans/trans_privileged.c.inc @@ -78,7 +78,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a) if (has_ext(ctx, RVS)) { decode_save_opc(ctx); gen_helper_sret(cpu_pc, cpu_env); - tcg_gen_exit_tb(NULL, 0); /* no chaining */ + exit_tb(ctx); /* no chaining */ ctx->base.is_jmp = DISAS_NORETURN; } else { return false; @@ -94,7 +94,7 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a) #ifndef CONFIG_USER_ONLY decode_save_opc(ctx); gen_helper_mret(cpu_pc, cpu_env); - tcg_gen_exit_tb(NULL, 0); /* no chaining */ + exit_tb(ctx); /* no chaining */ ctx->base.is_jmp = DISAS_NORETURN; return true; #else diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc index c49dbec..5c69b88 100644 --- a/target/riscv/insn_trans/trans_rvi.c.inc +++ b/target/riscv/insn_trans/trans_rvi.c.inc @@ -66,7 +66,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) } gen_set_gpri(ctx, a->rd, ctx->pc_succ_insn); - tcg_gen_lookup_and_goto_ptr(); + lookup_and_goto_ptr(ctx); if (misaligned) { gen_set_label(misaligned); @@ -803,7 +803,7 @@ static bool trans_pause(DisasContext *ctx, arg_pause *a) * end the TB and return to main loop */ gen_set_pc_imm(ctx, ctx->pc_succ_insn); - tcg_gen_exit_tb(NULL, 0); + exit_tb(ctx); ctx->base.is_jmp = DISAS_NORETURN; return true; @@ -827,7 +827,7 @@ static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a) * however we need to end the translation block */ gen_set_pc_imm(ctx, ctx->pc_succ_insn); - tcg_gen_exit_tb(NULL, 0); + exit_tb(ctx); ctx->base.is_jmp = DISAS_NORETURN; return true; } @@ -838,7 +838,7 @@ static bool do_csr_post(DisasContext *ctx) decode_save_opc(ctx); /* We may have changed important cpu state -- exit to main loop. */ gen_set_pc_imm(ctx, ctx->pc_succ_insn); - tcg_gen_exit_tb(NULL, 0); + exit_tb(ctx); ctx->base.is_jmp = DISAS_NORETURN; return true; } diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 4dea441..d455ace 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -196,7 +196,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) mark_vs_dirty(s); gen_set_pc_imm(s, s->pc_succ_insn); - tcg_gen_lookup_and_goto_ptr(); + lookup_and_goto_ptr(s); s->base.is_jmp = DISAS_NORETURN; if (rd == 0 && rs1 == 0) { @@ -222,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) gen_set_gpr(s, rd, dst); mark_vs_dirty(s); gen_set_pc_imm(s, s->pc_succ_insn); - tcg_gen_lookup_and_goto_ptr(); + lookup_and_goto_ptr(s); s->base.is_jmp = DISAS_NORETURN; return true; diff --git a/target/riscv/insn_trans/trans_rvzawrs.c.inc b/target/riscv/insn_trans/trans_rvzawrs.c.inc new file mode 100644 index 0000000..8254e7d --- /dev/null +++ b/target/riscv/insn_trans/trans_rvzawrs.c.inc @@ -0,0 +1,51 @@ +/* + * RISC-V translation routines for the RISC-V Zawrs Extension. + * + * Copyright (c) 2022 Christoph Muellner, christoph.muellner@vrull.io + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +static bool trans_wrs(DisasContext *ctx) +{ + if (!ctx->cfg_ptr->ext_zawrs) { + return false; + } + + /* + * The specification says: + * While stalled, an implementation is permitted to occasionally + * terminate the stall and complete execution for any reason. + * + * So let's just exit TB and return to the main loop. + */ + + /* Clear the load reservation (if any). */ + tcg_gen_movi_tl(load_res, -1); + + gen_set_pc_imm(ctx, ctx->pc_succ_insn); + tcg_gen_exit_tb(NULL, 0); + ctx->base.is_jmp = DISAS_NORETURN; + + return true; +} + +#define GEN_TRANS_WRS(insn) \ +static bool trans_ ## insn(DisasContext *ctx, arg_ ## insn *a) \ +{ \ + (void)a; \ + return trans_wrs(ctx); \ +} + +GEN_TRANS_WRS(wrs_nto) +GEN_TRANS_WRS(wrs_sto) diff --git a/target/riscv/machine.c b/target/riscv/machine.c index c2a94a8..65a8549 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -21,6 +21,8 @@ #include "qemu/error-report.h" #include "sysemu/kvm.h" #include "migration/cpu.h" +#include "sysemu/cpu-timers.h" +#include "debug.h" static bool pmp_needed(void *opaque) { @@ -229,11 +231,24 @@ static bool debug_needed(void *opaque) return riscv_feature(env, RISCV_FEATURE_DEBUG); } +static int debug_post_load(void *opaque, int version_id) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + if (icount_enabled()) { + env->itrigger_enabled = riscv_itrigger_enabled(env); + } + + return 0; +} + static const VMStateDescription vmstate_debug = { .name = "cpu/debug", .version_id = 2, .minimum_version_id = 2, .needed = debug_needed, + .post_load = debug_post_load, .fields = (VMStateField[]) { VMSTATE_UINTTL(env.trigger_cur, RISCVCPU), VMSTATE_UINTTL_ARRAY(env.tdata1, RISCVCPU, RV_MAX_TRIGGERS), @@ -253,6 +268,26 @@ static int riscv_cpu_post_load(void *opaque, int version_id) return 0; } +static bool smstateen_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + + return cpu->cfg.ext_smstateen; +} + +static const VMStateDescription vmstate_smstateen = { + .name = "cpu/smtateen", + .version_id = 1, + .minimum_version_id = 1, + .needed = smstateen_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.mstateen, RISCVCPU, 4), + VMSTATE_UINT64_ARRAY(env.hstateen, RISCVCPU, 4), + VMSTATE_UINT64_ARRAY(env.sstateen, RISCVCPU, 4), + VMSTATE_END_OF_LIST() + } +}; + static bool envcfg_needed(void *opaque) { RISCVCPU *cpu = opaque; @@ -364,6 +399,7 @@ const VMStateDescription vmstate_riscv_cpu = { &vmstate_kvmtimer, &vmstate_envcfg, &vmstate_debug, + &vmstate_smstateen, NULL } }; diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 09f1f51..878bcb0 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -149,21 +149,24 @@ target_ulong helper_sret(CPURISCVState *env) } mstatus = env->mstatus; + prev_priv = get_field(mstatus, MSTATUS_SPP); + mstatus = set_field(mstatus, MSTATUS_SIE, + get_field(mstatus, MSTATUS_SPIE)); + mstatus = set_field(mstatus, MSTATUS_SPIE, 1); + mstatus = set_field(mstatus, MSTATUS_SPP, PRV_U); + if (env->priv_ver >= PRIV_VERSION_1_12_0) { + mstatus = set_field(mstatus, MSTATUS_MPRV, 0); + } + env->mstatus = mstatus; if (riscv_has_ext(env, RVH) && !riscv_cpu_virt_enabled(env)) { /* We support Hypervisor extensions and virtulisation is disabled */ target_ulong hstatus = env->hstatus; - prev_priv = get_field(mstatus, MSTATUS_SPP); prev_virt = get_field(hstatus, HSTATUS_SPV); hstatus = set_field(hstatus, HSTATUS_SPV, 0); - mstatus = set_field(mstatus, MSTATUS_SPP, 0); - mstatus = set_field(mstatus, SSTATUS_SIE, - get_field(mstatus, SSTATUS_SPIE)); - mstatus = set_field(mstatus, SSTATUS_SPIE, 1); - env->mstatus = mstatus; env->hstatus = hstatus; if (prev_virt) { @@ -171,14 +174,6 @@ target_ulong helper_sret(CPURISCVState *env) } riscv_cpu_set_virt_enabled(env, prev_virt); - } else { - prev_priv = get_field(mstatus, MSTATUS_SPP); - - mstatus = set_field(mstatus, MSTATUS_SIE, - get_field(mstatus, MSTATUS_SPIE)); - mstatus = set_field(mstatus, MSTATUS_SPIE, 1); - mstatus = set_field(mstatus, MSTATUS_SPP, PRV_U); - env->mstatus = mstatus; } riscv_cpu_set_mode(env, prev_priv); @@ -202,7 +197,7 @@ target_ulong helper_mret(CPURISCVState *env) if (riscv_feature(env, RISCV_FEATURE_PMP) && !pmp_get_num_rules(env) && (prev_priv != PRV_M)) { - riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + riscv_raise_exception(env, RISCV_EXCP_INST_ACCESS_FAULT, GETPC()); } target_ulong prev_virt = get_field(env->mstatus, MSTATUS_MPV); @@ -211,6 +206,9 @@ target_ulong helper_mret(CPURISCVState *env) mstatus = set_field(mstatus, MSTATUS_MPIE, 1); mstatus = set_field(mstatus, MSTATUS_MPP, PRV_U); mstatus = set_field(mstatus, MSTATUS_MPV, 0); + if ((env->priv_ver >= PRIV_VERSION_1_12_0) && (prev_priv != PRV_M)) { + mstatus = set_field(mstatus, MSTATUS_MPRV, 0); + } env->mstatus = mstatus; riscv_cpu_set_mode(env, prev_priv); diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 2b43e39..d1126a6 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -292,8 +292,11 @@ static bool pmp_hart_has_privs_default(CPURISCVState *env, target_ulong addr, /* * Check if the address has required RWX privs to complete desired operation + * Return PMP rule index if a pmp rule match + * Return MAX_RISCV_PMPS if default match + * Return negtive value if no match */ -bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, +int pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, target_ulong size, pmp_priv_t privs, pmp_priv_t *allowed_privs, target_ulong mode) { @@ -305,8 +308,10 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { - return pmp_hart_has_privs_default(env, addr, size, privs, - allowed_privs, mode); + if (pmp_hart_has_privs_default(env, addr, size, privs, + allowed_privs, mode)) { + ret = MAX_RISCV_PMPS; + } } if (size == 0) { @@ -333,7 +338,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, if ((s + e) == 1) { qemu_log_mask(LOG_GUEST_ERROR, "pmp violation - access is partially inside\n"); - ret = 0; + ret = -1; break; } @@ -436,18 +441,22 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, } } - ret = ((privs & *allowed_privs) == privs); + if ((privs & *allowed_privs) == privs) { + ret = i; + } break; } } /* No rule matched */ if (ret == -1) { - return pmp_hart_has_privs_default(env, addr, size, privs, - allowed_privs, mode); + if (pmp_hart_has_privs_default(env, addr, size, privs, + allowed_privs, mode)) { + ret = MAX_RISCV_PMPS; + } } - return ret == 1 ? true : false; + return ret; } /* @@ -586,64 +595,25 @@ target_ulong mseccfg_csr_read(CPURISCVState *env) * Calculate the TLB size if the start address or the end address of * PMP entry is presented in the TLB page. */ -static target_ulong pmp_get_tlb_size(CPURISCVState *env, int pmp_index, - target_ulong tlb_sa, target_ulong tlb_ea) +target_ulong pmp_get_tlb_size(CPURISCVState *env, int pmp_index, + target_ulong tlb_sa, target_ulong tlb_ea) { target_ulong pmp_sa = env->pmp_state.addr[pmp_index].sa; target_ulong pmp_ea = env->pmp_state.addr[pmp_index].ea; - if (pmp_sa >= tlb_sa && pmp_ea <= tlb_ea) { - return pmp_ea - pmp_sa + 1; - } - - if (pmp_sa >= tlb_sa && pmp_sa <= tlb_ea && pmp_ea >= tlb_ea) { - return tlb_ea - pmp_sa + 1; - } - - if (pmp_ea <= tlb_ea && pmp_ea >= tlb_sa && pmp_sa <= tlb_sa) { - return pmp_ea - tlb_sa + 1; - } - - return 0; -} - -/* - * Check is there a PMP entry which range covers this page. If so, - * try to find the minimum granularity for the TLB size. - */ -bool pmp_is_range_in_tlb(CPURISCVState *env, hwaddr tlb_sa, - target_ulong *tlb_size) -{ - int i; - target_ulong val; - target_ulong tlb_ea = (tlb_sa + TARGET_PAGE_SIZE - 1); - - for (i = 0; i < MAX_RISCV_PMPS; i++) { - val = pmp_get_tlb_size(env, i, tlb_sa, tlb_ea); - if (val) { - if (*tlb_size == 0 || *tlb_size > val) { - *tlb_size = val; - } - } - } - - if (*tlb_size != 0) { + if (pmp_sa <= tlb_sa && pmp_ea >= tlb_ea) { + return TARGET_PAGE_SIZE; + } else { /* - * At this point we have a tlb_size that is the smallest possible size - * That fits within a TARGET_PAGE_SIZE and the PMP region. - * - * If the size is less then TARGET_PAGE_SIZE we drop the size to 1. - * This means the result isn't cached in the TLB and is only used for - * a single translation. - */ - if (*tlb_size < TARGET_PAGE_SIZE) { - *tlb_size = 1; - } - - return true; + * At this point we have a tlb_size that is the smallest possible size + * That fits within a TARGET_PAGE_SIZE and the PMP region. + * + * If the size is less then TARGET_PAGE_SIZE we drop the size to 1. + * This means the result isn't cached in the TLB and is only used for + * a single translation. + */ + return 1; } - - return false; } /* diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h index a8dd797..da32c61 100644 --- a/target/riscv/pmp.h +++ b/target/riscv/pmp.h @@ -72,11 +72,11 @@ target_ulong mseccfg_csr_read(CPURISCVState *env); void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong val); target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index); -bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, +int pmp_hart_has_privs(CPURISCVState *env, target_ulong addr, target_ulong size, pmp_priv_t privs, pmp_priv_t *allowed_privs, target_ulong mode); -bool pmp_is_range_in_tlb(CPURISCVState *env, hwaddr tlb_sa, - target_ulong *tlb_size); +target_ulong pmp_get_tlb_size(CPURISCVState *env, int pmp_index, + target_ulong tlb_sa, target_ulong tlb_ea); void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index); void pmp_update_rule_nums(CPURISCVState *env); uint32_t pmp_get_num_rules(CPURISCVState *env); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index db123da..df38db7 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -76,6 +76,7 @@ typedef struct DisasContext { to reset this known value. */ int frm; RISCVMXL ol; + bool virt_inst_excp; bool virt_enabled; const RISCVCPUConfig *cfg_ptr; bool hlsx; @@ -111,6 +112,8 @@ typedef struct DisasContext { /* PointerMasking extension */ bool pm_mask_enabled; bool pm_base_enabled; + /* Use icount trigger for native debug */ + bool itrigger; /* TCG of the current insn_start */ TCGOp *insn_start; } DisasContext; @@ -243,7 +246,11 @@ static void gen_exception_illegal(DisasContext *ctx) { tcg_gen_st_i32(tcg_constant_i32(ctx->opcode), cpu_env, offsetof(CPURISCVState, bins)); - generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST); + if (ctx->virt_inst_excp) { + generate_exception(ctx, RISCV_EXCP_VIRT_INSTRUCTION_FAULT); + } else { + generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST); + } } static void gen_exception_inst_addr_mis(DisasContext *ctx) @@ -252,15 +259,39 @@ static void gen_exception_inst_addr_mis(DisasContext *ctx) generate_exception(ctx, RISCV_EXCP_INST_ADDR_MIS); } +static void lookup_and_goto_ptr(DisasContext *ctx) +{ +#ifndef CONFIG_USER_ONLY + if (ctx->itrigger) { + gen_helper_itrigger_match(cpu_env); + } +#endif + tcg_gen_lookup_and_goto_ptr(); +} + +static void exit_tb(DisasContext *ctx) +{ +#ifndef CONFIG_USER_ONLY + if (ctx->itrigger) { + gen_helper_itrigger_match(cpu_env); + } +#endif + tcg_gen_exit_tb(NULL, 0); +} + static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) { - if (translator_use_goto_tb(&ctx->base, dest)) { + /* + * Under itrigger, instruction executes one by one like singlestep, + * direct block chain benefits will be small. + */ + if (translator_use_goto_tb(&ctx->base, dest) && !ctx->itrigger) { tcg_gen_goto_tb(n); gen_set_pc_imm(ctx, dest); tcg_gen_exit_tb(ctx->base.tb, n); } else { gen_set_pc_imm(ctx, dest); - tcg_gen_lookup_and_goto_ptr(); + lookup_and_goto_ptr(ctx); } } @@ -1029,6 +1060,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_rvh.c.inc" #include "insn_trans/trans_rvv.c.inc" #include "insn_trans/trans_rvb.c.inc" +#include "insn_trans/trans_rvzawrs.c.inc" #include "insn_trans/trans_rvzfh.c.inc" #include "insn_trans/trans_rvk.c.inc" #include "insn_trans/trans_privileged.c.inc" @@ -1062,16 +1094,13 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) { has_XVentanaCondOps_p, decode_XVentanaCodeOps }, }; + ctx->virt_inst_excp = false; /* Check for compressed insn */ if (insn_len(opcode) == 2) { - if (!has_ext(ctx, RVC)) { - gen_exception_illegal(ctx); - } else { - ctx->opcode = opcode; - ctx->pc_succ_insn = ctx->base.pc_next + 2; - if (decode_insn16(ctx, opcode)) { - return; - } + ctx->opcode = opcode; + ctx->pc_succ_insn = ctx->base.pc_next + 2; + if (has_ext(ctx, RVC) && decode_insn16(ctx, opcode)) { + return; } } else { uint32_t opcode32 = opcode; @@ -1136,6 +1165,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) memset(ctx->ftemp, 0, sizeof(ctx->ftemp)); ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED); ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED); + ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER); ctx->zero = tcg_constant_tl(0); } @@ -1175,7 +1205,7 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) /* Only the first insn within a TB is allowed to cross a page boundary. */ if (ctx->base.is_jmp == DISAS_NEXT) { - if (!is_same_page(&ctx->base, ctx->base.pc_next)) { + if (ctx->itrigger || !is_same_page(&ctx->base, ctx->base.pc_next)) { ctx->base.is_jmp = DISAS_TOO_MANY; } else { unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK; diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 34858eb..150aeec 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -163,13 +163,6 @@ static inline void gen_update_fprs_dirty(DisasContext *dc, int rd) /* floating point registers moves */ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) { -#if TCG_TARGET_REG_BITS == 32 - if (src & 1) { - return TCGV_LOW(cpu_fpr[src / 2]); - } else { - return TCGV_HIGH(cpu_fpr[src / 2]); - } -#else TCGv_i32 ret = get_temp_i32(dc); if (src & 1) { tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]); @@ -177,22 +170,16 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]); } return ret; -#endif } static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v) { -#if TCG_TARGET_REG_BITS == 32 - if (dst & 1) { - tcg_gen_mov_i32(TCGV_LOW(cpu_fpr[dst / 2]), v); - } else { - tcg_gen_mov_i32(TCGV_HIGH(cpu_fpr[dst / 2]), v); - } -#else - TCGv_i64 t = (TCGv_i64)v; + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(t, v); tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t, (dst & 1 ? 0 : 32), 32); -#endif + tcg_temp_free_i64(t); gen_update_fprs_dirty(dc, dst); } diff --git a/tcg/README b/tcg/README deleted file mode 100644 index bc15cc3..0000000 --- a/tcg/README +++ /dev/null @@ -1,784 +0,0 @@ -Tiny Code Generator - Fabrice Bellard. - -1) Introduction - -TCG (Tiny Code Generator) began as a generic backend for a C -compiler. It was simplified to be used in QEMU. It also has its roots -in the QOP code generator written by Paul Brook. - -2) Definitions - -TCG receives RISC-like "TCG ops" and performs some optimizations on them, -including liveness analysis and trivial constant expression -evaluation. TCG ops are then implemented in the host CPU back end, -also known as the TCG "target". - -The TCG "target" is the architecture for which we generate the -code. It is of course not the same as the "target" of QEMU which is -the emulated architecture. As TCG started as a generic C backend used -for cross compiling, it is assumed that the TCG target is different -from the host, although it is never the case for QEMU. - -In this document, we use "guest" to specify what architecture we are -emulating; "target" always means the TCG target, the machine on which -we are running QEMU. - -A TCG "function" corresponds to a QEMU Translated Block (TB). - -A TCG "temporary" is a variable only live in a basic -block. Temporaries are allocated explicitly in each function. - -A TCG "local temporary" is a variable only live in a function. Local -temporaries are allocated explicitly in each function. - -A TCG "global" is a variable which is live in all the functions -(equivalent of a C global variable). They are defined before the -functions defined. A TCG global can be a memory location (e.g. a QEMU -CPU register), a fixed host register (e.g. the QEMU CPU state pointer) -or a memory location which is stored in a register outside QEMU TBs -(not implemented yet). - -A TCG "basic block" corresponds to a list of instructions terminated -by a branch instruction. - -An operation with "undefined behavior" may result in a crash. - -An operation with "unspecified behavior" shall not crash. However, -the result may be one of several possibilities so may be considered -an "undefined result". - -3) Intermediate representation - -3.1) Introduction - -TCG instructions operate on variables which are temporaries, local -temporaries or globals. TCG instructions and variables are strongly -typed. Two types are supported: 32 bit integers and 64 bit -integers. Pointers are defined as an alias to 32 bit or 64 bit -integers depending on the TCG target word size. - -Each instruction has a fixed number of output variable operands, input -variable operands and always constant operands. - -The notable exception is the call instruction which has a variable -number of outputs and inputs. - -In the textual form, output operands usually come first, followed by -input operands, followed by constant operands. The output type is -included in the instruction name. Constants are prefixed with a '$'. - -add_i32 t0, t1, t2 (t0 <- t1 + t2) - -3.2) Assumptions - -* Basic blocks - -- Basic blocks end after branches (e.g. brcond_i32 instruction), - goto_tb and exit_tb instructions. -- Basic blocks start after the end of a previous basic block, or at a - set_label instruction. - -After the end of a basic block, the content of temporaries is -destroyed, but local temporaries and globals are preserved. - -* Floating point types are not supported yet - -* Pointers: depending on the TCG target, pointer size is 32 bit or 64 - bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or - TCG_TYPE_I64. - -* Helpers: - -Using the tcg_gen_helper_x_y it is possible to call any function -taking i32, i64 or pointer types. By default, before calling a helper, -all globals are stored at their canonical location and it is assumed -that the function can modify them. By default, the helper is allowed to -modify the CPU state or raise an exception. - -This can be overridden using the following function modifiers: -- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals, - either directly or via an exception. They will not be saved to their - canonical locations before calling the helper. -- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals. - They will only be saved to their canonical location before calling helpers, - but they won't be reloaded afterwards. -- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if - the return value is not used. - -Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS. - -On some TCG targets (e.g. x86), several calling conventions are -supported. - -* Branches: - -Use the instruction 'br' to jump to a label. - -3.3) Code Optimizations - -When generating instructions, you can count on at least the following -optimizations: - -- Single instructions are simplified, e.g. - - and_i32 t0, t0, $0xffffffff - - is suppressed. - -- A liveness analysis is done at the basic block level. The - information is used to suppress moves from a dead variable to - another one. It is also used to remove instructions which compute - dead results. The later is especially useful for condition code - optimization in QEMU. - - In the following example: - - add_i32 t0, t1, t2 - add_i32 t0, t0, $1 - mov_i32 t0, $1 - - only the last instruction is kept. - -3.4) Instruction Reference - -********* Function call - -* call <ret> <params> ptr - -call function 'ptr' (pointer type) - -<ret> optional 32 bit or 64 bit return value -<params> optional 32 bit or 64 bit parameters - -********* Jumps/Labels - -* set_label $label - -Define label 'label' at the current program point. - -* br $label - -Jump to label. - -* brcond_i32/i64 t0, t1, cond, label - -Conditional jump if t0 cond t1 is true. cond can be: - TCG_COND_EQ - TCG_COND_NE - TCG_COND_LT /* signed */ - TCG_COND_GE /* signed */ - TCG_COND_LE /* signed */ - TCG_COND_GT /* signed */ - TCG_COND_LTU /* unsigned */ - TCG_COND_GEU /* unsigned */ - TCG_COND_LEU /* unsigned */ - TCG_COND_GTU /* unsigned */ - -********* Arithmetic - -* add_i32/i64 t0, t1, t2 - -t0=t1+t2 - -* sub_i32/i64 t0, t1, t2 - -t0=t1-t2 - -* neg_i32/i64 t0, t1 - -t0=-t1 (two's complement) - -* mul_i32/i64 t0, t1, t2 - -t0=t1*t2 - -* div_i32/i64 t0, t1, t2 - -t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. - -* divu_i32/i64 t0, t1, t2 - -t0=t1/t2 (unsigned). Undefined behavior if division by zero. - -* rem_i32/i64 t0, t1, t2 - -t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. - -* remu_i32/i64 t0, t1, t2 - -t0=t1%t2 (unsigned). Undefined behavior if division by zero. - -********* Logical - -* and_i32/i64 t0, t1, t2 - -t0=t1&t2 - -* or_i32/i64 t0, t1, t2 - -t0=t1|t2 - -* xor_i32/i64 t0, t1, t2 - -t0=t1^t2 - -* not_i32/i64 t0, t1 - -t0=~t1 - -* andc_i32/i64 t0, t1, t2 - -t0=t1&~t2 - -* eqv_i32/i64 t0, t1, t2 - -t0=~(t1^t2), or equivalently, t0=t1^~t2 - -* nand_i32/i64 t0, t1, t2 - -t0=~(t1&t2) - -* nor_i32/i64 t0, t1, t2 - -t0=~(t1|t2) - -* orc_i32/i64 t0, t1, t2 - -t0=t1|~t2 - -* clz_i32/i64 t0, t1, t2 - -t0 = t1 ? clz(t1) : t2 - -* ctz_i32/i64 t0, t1, t2 - -t0 = t1 ? ctz(t1) : t2 - -* ctpop_i32/i64 t0, t1 - -t0 = number of bits set in t1 -With "ctpop" short for "count population", matching -the function name used in include/qemu/host-utils.h. - -********* Shifts/Rotates - -* shl_i32/i64 t0, t1, t2 - -t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) - -* shr_i32/i64 t0, t1, t2 - -t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) - -* sar_i32/i64 t0, t1, t2 - -t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) - -* rotl_i32/i64 t0, t1, t2 - -Rotation of t2 bits to the left. -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) - -* rotr_i32/i64 t0, t1, t2 - -Rotation of t2 bits to the right. -Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64) - -********* Misc - -* mov_i32/i64 t0, t1 - -t0 = t1 - -Move t1 to t0 (both operands must have the same type). - -* ext8s_i32/i64 t0, t1 -ext8u_i32/i64 t0, t1 -ext16s_i32/i64 t0, t1 -ext16u_i32/i64 t0, t1 -ext32s_i64 t0, t1 -ext32u_i64 t0, t1 - -8, 16 or 32 bit sign/zero extension (both operands must have the same type) - -* bswap16_i32/i64 t0, t1, flags - -16 bit byte swap on the low bits of a 32/64 bit input. -If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15. -If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15. -If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15. -If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of -t0 above bit 15 may contain any value. - -* bswap32_i64 t0, t1, flags - -32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, -except they apply from bit 31 instead of bit 15. - -* bswap32_i32 t0, t1, flags -* bswap64_i64 t0, t1, flags - -32/64 bit byte swap. The flags are ignored, but still present -for consistency with the other bswap opcodes. - -* discard_i32/i64 t0 - -Indicate that the value of t0 won't be used later. It is useful to -force dead code elimination. - -* deposit_i32/i64 dest, t1, t2, pos, len - -Deposit T2 as a bitfield into T1, placing the result in DEST. -The bitfield is described by POS/LEN, which are immediate values: - - LEN - the length of the bitfield - POS - the position of the first bit, counting from the LSB - -For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field -at bit 8. This operation would be equivalent to - - dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) - -* extract_i32/i64 dest, t1, pos, len -* sextract_i32/i64 dest, t1, pos, len - -Extract a bitfield from T1, placing the result in DEST. -The bitfield is described by POS/LEN, which are immediate values, -as above for deposit. For extract_*, the result will be extended -to the left with zeros; for sextract_*, the result will be extended -to the left with copies of the bitfield sign bit at pos + len - 1. - -For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field -at bit 8. This operation would be equivalent to - - dest = (t1 << 20) >> 28 - -(using an arithmetic right shift). - -* extract2_i32/i64 dest, t1, t2, pos - -For N = {32,64}, extract an N-bit quantity from the concatenation -of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander -accepts 0 <= pos <= N as inputs. The backend code generator will -not see either 0 or N as inputs for these opcodes. - -* extrl_i64_i32 t0, t1 - -For 64-bit hosts only, extract the low 32-bits of input T1 and place it -into 32-bit output T0. Depending on the host, this may be a simple move, -or may require additional canonicalization. - -* extrh_i64_i32 t0, t1 - -For 64-bit hosts only, extract the high 32-bits of input T1 and place it -into 32-bit output T0. Depending on the host, this may be a simple shift, -or may require additional canonicalization. - -********* Conditional moves - -* setcond_i32/i64 dest, t1, t2, cond - -dest = (t1 cond t2) - -Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. - -* movcond_i32/i64 dest, c1, c2, v1, v2, cond - -dest = (c1 cond c2 ? v1 : v2) - -Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2. - -********* Type conversions - -* ext_i32_i64 t0, t1 -Convert t1 (32 bit) to t0 (64 bit) and does sign extension - -* extu_i32_i64 t0, t1 -Convert t1 (32 bit) to t0 (64 bit) and does zero extension - -* trunc_i64_i32 t0, t1 -Truncate t1 (64 bit) to t0 (32 bit) - -* concat_i32_i64 t0, t1, t2 -Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half -from t2 (32 bit). - -* concat32_i64 t0, t1, t2 -Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half -from t2 (64 bit). - -********* Load/Store - -* ld_i32/i64 t0, t1, offset -ld8s_i32/i64 t0, t1, offset -ld8u_i32/i64 t0, t1, offset -ld16s_i32/i64 t0, t1, offset -ld16u_i32/i64 t0, t1, offset -ld32s_i64 t0, t1, offset -ld32u_i64 t0, t1, offset - -t0 = read(t1 + offset) -Load 8, 16, 32 or 64 bits with or without sign extension from host memory. -offset must be a constant. - -* st_i32/i64 t0, t1, offset -st8_i32/i64 t0, t1, offset -st16_i32/i64 t0, t1, offset -st32_i64 t0, t1, offset - -write(t0, t1 + offset) -Write 8, 16, 32 or 64 bits to host memory. - -All this opcodes assume that the pointed host memory doesn't correspond -to a global. In the latter case the behaviour is unpredictable. - -********* Multiword arithmetic support - -* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high -* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high - -Similar to add/sub, except that the double-word inputs T1 and T2 are -formed from two single-word arguments, and the double-word output T0 -is returned in two single-word outputs. - -* mulu2_i32/i64 t0_low, t0_high, t1, t2 - -Similar to mul, except two unsigned inputs T1 and T2 yielding the full -double-word product T0. The later is returned in two single-word outputs. - -* muls2_i32/i64 t0_low, t0_high, t1, t2 - -Similar to mulu2, except the two inputs T1 and T2 are signed. - -* mulsh_i32/i64 t0, t1, t2 -* muluh_i32/i64 t0, t1, t2 - -Provide the high part of a signed or unsigned multiply, respectively. -If mulu2/muls2 are not provided by the backend, the tcg-op generator -can obtain the same results can be obtained by emitting a pair of -opcodes, mul+muluh/mulsh. - -********* Memory Barrier support - -* mb <$arg> - -Generate a target memory barrier instruction to ensure memory ordering as being -enforced by a corresponding guest memory barrier instruction. The ordering -enforced by the backend may be stricter than the ordering required by the guest. -It cannot be weaker. This opcode takes a constant argument which is required to -generate the appropriate barrier instruction. The backend should take care to -emit the target barrier instruction only when necessary i.e., for SMP guests and -when MTTCG is enabled. - -The guest translators should generate this opcode for all guest instructions -which have ordering side effects. - -Please see docs/devel/atomics.rst for more information on memory barriers. - -********* 64-bit guest on 32-bit host support - -The following opcodes are internal to TCG. Thus they are to be implemented by -32-bit host code generators, but are not to be emitted by guest translators. -They are emitted as needed by inline functions within "tcg-op.h". - -* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label - -Similar to brcond, except that the 64-bit values T0 and T1 -are formed from two 32-bit arguments. - -* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond - -Similar to setcond, except that the 64-bit values T1 and T2 are -formed from two 32-bit arguments. The result is a 32-bit value. - -********* QEMU specific operations - -* exit_tb t0 - -Exit the current TB and return the value t0 (word type). - -* goto_tb index - -Exit the current TB and jump to the TB index 'index' (constant) if the -current TB was linked to this TB. Otherwise execute the next -instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued -at most once with each slot index per TB. - -* lookup_and_goto_ptr tb_addr - -Look up a TB address ('tb_addr') and jump to it if valid. If not valid, -jump to the TCG epilogue to go back to the exec loop. - -This operation is optional. If the TCG backend does not implement the -goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). - -* qemu_ld_i32/i64 t0, t1, flags, memidx -* qemu_st_i32/i64 t0, t1, flags, memidx -* qemu_st8_i32 t0, t1, flags, memidx - -Load data at the guest address t1 into t0, or store data in t0 at guest -address t1. The _i32/_i64 size applies to the size of the input/output -register t0 only. The address t1 is always sized according to the guest, -and the width of the memory operation is controlled by flags. - -Both t0 and t1 may be split into little-endian ordered pairs of registers -if dealing with 64-bit quantities on a 32-bit host. - -The memidx selects the qemu tlb index to use (e.g. user or kernel access). -The flags are the MemOp bits, selecting the sign, width, and endianness -of the memory access. - -For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a -64-bit memory access specified in flags. - -For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of -the memory operation is known to be 8-bit. This allows the backend to -provide a different set of register constraints. - -********* Host vector operations - -All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE. -The former specifies the length of the vector in log2 64-bit units; the -later specifies the length of the element (if applicable) in log2 8-bit units. -E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. - -* mov_vec v0, v1 -* ld_vec v0, t1 -* st_vec v0, t1 - - Move, load and store. - -* dup_vec v0, r1 - - Duplicate the low N bits of R1 into VECL/VECE copies across V0. - -* dupi_vec v0, c - - Similarly, for a constant. - Smaller values will be replicated to host register size by the expanders. - -* dup2_vec v0, r1, r2 - - Duplicate r2:r1 into VECL/64 copies across V0. This opcode is - only present for 32-bit hosts. - -* add_vec v0, v1, v2 - - v0 = v1 + v2, in elements across the vector. - -* sub_vec v0, v1, v2 - - Similarly, v0 = v1 - v2. - -* mul_vec v0, v1, v2 - - Similarly, v0 = v1 * v2. - -* neg_vec v0, v1 - - Similarly, v0 = -v1. - -* abs_vec v0, v1 - - Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector. - -* smin_vec: -* umin_vec: - - Similarly, v0 = MIN(v1, v2), for signed and unsigned element types. - -* smax_vec: -* umax_vec: - - Similarly, v0 = MAX(v1, v2), for signed and unsigned element types. - -* ssadd_vec: -* sssub_vec: -* usadd_vec: -* ussub_vec: - - Signed and unsigned saturating addition and subtraction. If the true - result is not representable within the element type, the element is - set to the minimum or maximum value for the type. - -* and_vec v0, v1, v2 -* or_vec v0, v1, v2 -* xor_vec v0, v1, v2 -* andc_vec v0, v1, v2 -* orc_vec v0, v1, v2 -* not_vec v0, v1 - - Similarly, logical operations with and without complement. - Note that VECE is unused. - -* shli_vec v0, v1, i2 -* shls_vec v0, v1, s2 - - Shift all elements from v1 by a scalar i2/s2. I.e. - - for (i = 0; i < VECL/VECE; ++i) { - v0[i] = v1[i] << s2; - } - -* shri_vec v0, v1, i2 -* sari_vec v0, v1, i2 -* rotli_vec v0, v1, i2 -* shrs_vec v0, v1, s2 -* sars_vec v0, v1, s2 - - Similarly for logical and arithmetic right shift, and left rotate. - -* shlv_vec v0, v1, v2 - - Shift elements from v1 by elements from v2. I.e. - - for (i = 0; i < VECL/VECE; ++i) { - v0[i] = v1[i] << v2[i]; - } - -* shrv_vec v0, v1, v2 -* sarv_vec v0, v1, v2 -* rotlv_vec v0, v1, v2 -* rotrv_vec v0, v1, v2 - - Similarly for logical and arithmetic right shift, and rotates. - -* cmp_vec v0, v1, v2, cond - - Compare vectors by element, storing -1 for true and 0 for false. - -* bitsel_vec v0, v1, v2, v3 - - Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector. - -* cmpsel_vec v0, c1, c2, v3, v4, cond - - Select elements based on comparison results: - for (i = 0; i < n; ++i) { - v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i]. - } - -********* - -Note 1: Some shortcuts are defined when the last operand is known to be -a constant (e.g. addi for add, movi for mov). - -Note 2: When using TCG, the opcodes must never be generated directly -as some of them may not be available as "real" opcodes. Always use the -function tcg_gen_xxx(args). - -4) Backend - -tcg-target.h contains the target specific definitions. tcg-target.c.inc -contains the target specific code; it is #included by tcg/tcg.c, rather -than being a standalone C file. - -4.1) Assumptions - -The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or -64 bit. It is expected that the pointer has the same size as the word. - -On a 32 bit target, all 64 bit operations are converted to 32 bits. A -few specific operations must be implemented to allow it (see add2_i32, -sub2_i32, brcond2_i32). - -On a 64 bit target, the values are transferred between 32 and 64-bit -registers using the following ops: -- trunc_shr_i64_i32 -- ext_i32_i64 -- extu_i32_i64 - -They ensure that the values are correctly truncated or extended when -moved from a 32-bit to a 64-bit register or vice-versa. Note that the -trunc_shr_i64_i32 is an optional op. It is not necessary to implement -it if all the following conditions are met: -- 64-bit registers can hold 32-bit values -- 32-bit values in a 64-bit register do not need to stay zero or - sign extended -- all 32-bit TCG ops ignore the high part of 64-bit registers - -Floating point operations are not supported in this version. A -previous incarnation of the code generator had full support of them, -but it is better to concentrate on integer operations first. - -4.2) Constraints - -GCC like constraints are used to define the constraints of every -instruction. Memory constraints are not supported in this -version. Aliases are specified in the input operands as for GCC. - -The same register may be used for both an input and an output, even when -they are not explicitly aliased. If an op expands to multiple target -instructions then care must be taken to avoid clobbering input values. -GCC style "early clobber" outputs are supported, with '&'. - -A target can define specific register or constant constraints. If an -operation uses a constant input constraint which does not allow all -constants, it must also accept registers in order to have a fallback. -The constraint 'i' is defined generically to accept any constant. -The constraint 'r' is not defined generically, but is consistently -used by each backend to indicate all registers. - -The movi_i32 and movi_i64 operations must accept any constants. - -The mov_i32 and mov_i64 operations must accept any registers of the -same type. - -The ld/st/sti instructions must accept signed 32 bit constant offsets. -This can be implemented by reserving a specific register in which to -compute the address if the offset is too big. - -The ld/st instructions must accept any destination (ld) or source (st) -register. - -The sti instruction may fail if it cannot store the given constant. - -4.3) Function call assumptions - -- The only supported types for parameters and return value are: 32 and - 64 bit integers and pointer. -- The stack grows downwards. -- The first N parameters are passed in registers. -- The next parameters are passed on the stack by storing them as words. -- Some registers are clobbered during the call. -- The function can return 0 or 1 value in registers. On a 32 bit - target, functions must be able to return 2 values in registers for - 64 bit return type. - -5) Recommended coding rules for best performance - -- Use globals to represent the parts of the QEMU CPU state which are - often modified, e.g. the integer registers and the condition - codes. TCG will be able to use host registers to store them. - -- Avoid globals stored in fixed registers. They must be used only to - store the pointer to the CPU state and possibly to store a pointer - to a register window. - -- Use temporaries. Use local temporaries only when really needed, - e.g. when you need to use a value after a jump. Local temporaries - introduce a performance hit in the current TCG implementation: their - content is saved to memory at end of each basic block. - -- Free temporaries and local temporaries when they are no longer used - (tcg_temp_free). Since tcg_const_x() also creates a temporary, you - should free it after it is used. Freeing temporaries does not yield - a better generated code, but it reduces the memory usage of TCG and - the speed of the translation. - -- Don't hesitate to use helpers for complicated or seldom used guest - instructions. There is little performance advantage in using TCG to - implement guest instructions taking more than about twenty TCG - instructions. Note that this rule of thumb is more applicable to - helpers doing complex logic or arithmetic, where the C compiler has - scope to do a good job of optimisation; it is less relevant where - the instruction is mostly doing loads and stores, and in those cases - inline TCG may still be faster for longer sequences. - -- The hard limit on the number of TCG instructions you can generate - per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h -- - you cannot exceed this without risking a buffer overrun. - -- Use the 'discard' instruction if you know that TCG won't be able to - prove that a given global is "dead" at a given program point. The - x86 guest uses it to improve the condition codes optimisation. diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 344b63e..ad1816e 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1336,22 +1336,23 @@ static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target) } } -static inline void tcg_out_callr(TCGContext *s, TCGReg reg) -{ - tcg_out_insn(s, 3207, BLR, reg); -} - -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) { ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2; if (offset == sextract64(offset, 0, 26)) { tcg_out_insn(s, 3206, BL, offset); } else { tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); - tcg_out_callr(s, TCG_REG_TMP); + tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); } } +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, + const TCGHelperInfo *info) +{ + tcg_out_call_int(s, target); +} + void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, uintptr_t jmp_rw, uintptr_t addr) { @@ -1599,7 +1600,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi); tcg_out_adr(s, TCG_REG_X3, lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); if (opc & MO_SIGN) { tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0); } else { @@ -1625,7 +1626,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi); tcg_out_adr(s, TCG_REG_X4, lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]); tcg_out_goto(s, lb->raddr); return true; } diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 485f685..413a541 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -16,7 +16,6 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 #define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) -#undef TCG_TARGET_STACK_GROWSUP typedef enum { TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, @@ -52,8 +51,9 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 2c6c353..9245ea8 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1131,7 +1131,7 @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) * The call case is mostly used for helpers - so it's not unreasonable * for them to be beyond branch range. */ -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -1150,6 +1150,12 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); } +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr, + const TCGHelperInfo *info) +{ + tcg_out_call_int(s, addr); +} + static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) { if (l->has_value) { @@ -1515,7 +1521,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); /* Use the canonical unsigned helpers and minimize icache usage. */ - tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); datalo = lb->datalo_reg; datahi = lb->datahi_reg; diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 7e96495..b7843d2 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -30,7 +30,6 @@ extern int arm_arch; #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) -#undef TCG_TARGET_STACK_GROWSUP #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 #define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX @@ -89,8 +88,9 @@ extern bool use_neon_instructions; /* used for function call generation */ #define TCG_TARGET_STACK_ALIGN 8 -#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN /* optional instructions */ #define TCG_TARGET_HAS_ext8s_i32 1 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index cb04e4b..58bd587 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1661,7 +1661,8 @@ static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) } } -static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, + const TCGHelperInfo *info) { tcg_out_branch(s, 1, dest); } @@ -1885,7 +1886,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) (uintptr_t)l->raddr); } - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 00fcbe2..7edb7f1 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -98,6 +98,8 @@ typedef enum { #else #define TCG_TARGET_CALL_STACK_OFFSET 0 #endif +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL extern bool have_bmi1; extern bool have_popcnt; diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index d326e28..c9e99e8 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -567,7 +567,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, + const TCGHelperInfo *info) { tcg_out_call_int(s, arg, false); } @@ -760,7 +761,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_ld_helpers[size]); + tcg_out_call_int(s, qemu_ld_helpers[size], false); switch (opc & MO_SSIZE) { case MO_SB: @@ -821,7 +822,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_st_helpers[size]); + tcg_out_call_int(s, qemu_st_helpers[size], false); return tcg_out_goto(s, l->raddr); } diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index a659c8d..e5f7a1f 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -92,8 +92,9 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index bd76f0c..292e490 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1020,7 +1020,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, + const TCGHelperInfo *info) { tcg_out_call_int(s, arg, false); tcg_out_nop(s); diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7669213..15721c3 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -83,10 +83,12 @@ typedef enum { #define TCG_TARGET_STACK_ALIGN 16 #if _MIPS_SIM == _ABIO32 # define TCG_TARGET_CALL_STACK_OFFSET 16 +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN #else # define TCG_TARGET_CALL_STACK_OFFSET 0 +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #endif -#define TCG_TARGET_CALL_ALIGN_ARGS 1 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL /* MOVN/MOVZ instructions detection */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ diff --git a/tcg/optimize.c b/tcg/optimize.c index ae081ab..763bca9 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -667,9 +667,7 @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) { for (int i = 0; i < nb_args; i++) { TCGTemp *ts = arg_temp(op->args[i]); - if (ts) { - init_ts_info(ctx, ts); - } + init_ts_info(ctx, ts); } } @@ -680,7 +678,7 @@ static void copy_propagate(OptContext *ctx, TCGOp *op, for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); - if (ts && ts_is_copy(ts)) { + if (ts_is_copy(ts)) { op->args[i] = temp_arg(find_better_copy(s, ts)); } } @@ -962,7 +960,7 @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0); + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, al); tcg_opt_gen_movi(ctx, op2, rh, ah); @@ -1613,7 +1611,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0); + op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e3dba47..e062146 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -42,10 +42,17 @@ # else # error "Unknown ABI" # endif -#endif +#endif +#if TCG_TARGET_REG_BITS == 64 +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND +#else +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#endif #ifdef _CALL_SYSV -# define TCG_TARGET_CALL_ALIGN_ARGS 1 +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +#else +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #endif /* For some memory operations, we need a scratch that isn't R0. For the AIX @@ -1995,7 +2002,8 @@ static void tcg_out_call_int(TCGContext *s, int lk, #endif } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, + const TCGHelperInfo *info) { tcg_out_call_int(s, LK, target); } @@ -2202,9 +2210,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) lo = lb->addrlo_reg; hi = lb->addrhi_reg; if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); } else { @@ -2216,7 +2222,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); lo = lb->datalo_reg; hi = lb->datahi_reg; @@ -2250,9 +2256,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) lo = lb->addrlo_reg; hi = lb->addrhi_reg; if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); tcg_out_mov(s, TCG_TYPE_I32, arg++, lo); } else { @@ -2266,9 +2270,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) if (TCG_TARGET_REG_BITS == 32) { switch (s_bits) { case MO_64: -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); tcg_out_mov(s, TCG_TYPE_I32, arg++, hi); /* FALLTHRU */ case MO_32: @@ -2289,7 +2291,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) tcg_out_movi(s, TCG_TYPE_I32, arg++, oi); tcg_out32(s, MFSPR | RT(arg) | LR); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tcg_out_b(s, 0, lb->raddr); return true; @@ -2324,9 +2326,8 @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { TCGReg arg = TCG_REG_R4; -#ifdef TCG_TARGET_CALL_ALIGN_ARGS - arg |= 1; -#endif + + arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN); if (l->addrlo_reg != arg) { tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg); tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg); @@ -2525,7 +2526,6 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) /* Parameters for function call generation, used in tcg.c. */ #define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_EXTEND_ARGS 1 #ifdef _CALL_AIX # define LINK_AREA_SIZE (6 * SZR) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 81a83e4..f741e05 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -154,13 +154,26 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_ZERO) && val == 0) { return 1; } - if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) { + /* + * Sign extended from 12 bits: [-0x800, 0x7ff]. + * Used for most arithmetic, as this is the isa field. + */ + if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) { return 1; } - if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) { + /* + * Sign extended from 12 bits, negated: [-0x7ff, 0x800]. + * Used for subtraction, where a constant must be handled by ADDI. + */ + if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) { return 1; } - if ((ct & TCG_CT_CONST_M12) && val >= -0xfff && val <= 0xfff) { + /* + * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. + * Used by addsub2, which may need the negative operation, + * and requires the modified constant to be representable. + */ + if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { return 1; } return 0; @@ -687,9 +700,15 @@ static void tcg_out_addsub2(TCGContext *s, if (cbl) { tcg_out_opc_imm(s, opc_addi, rl, al, bl); tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { + } else if (al == bl) { + /* + * If the input regs overlap, this is a simple doubling + * and carry-out is the input msb. This special case is + * required when the output reg overlaps the input, + * but we might as well use it always. + */ tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); - tcg_out_opc_reg(s, opc_addi, rl, al, bl); + tcg_out_opc_reg(s, opc_add, rl, al, al); } else { tcg_out_opc_reg(s, opc_add, rl, al, bl); tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, @@ -819,7 +838,8 @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, + const TCGHelperInfo *info) { tcg_out_call_int(s, arg, false); } @@ -904,9 +924,9 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target) tcg_debug_assert(ok); } -static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, - TCGReg addrh, MemOpIdx oi, - tcg_insn_unit **label_ptr, bool is_load) +static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl, + TCGReg addrh, MemOpIdx oi, + tcg_insn_unit **label_ptr, bool is_load) { MemOp opc = get_memop(oi); unsigned s_bits = opc & MO_SIZE; @@ -956,6 +976,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, addrl = TCG_REG_TMP0; } tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl); + return TCG_REG_TMP0; } static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi, @@ -1002,7 +1023,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a2, oi); tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false); tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0); tcg_out_goto(s, l->raddr); @@ -1047,7 +1068,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) tcg_out_movi(s, TCG_TYPE_PTR, a3, oi); tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr); - tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]); + tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false); tcg_out_goto(s, l->raddr); return true; @@ -1158,7 +1179,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #else unsigned a_bits; #endif - TCGReg base = TCG_REG_TMP0; + TCGReg base; data_regl = *args++; data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); @@ -1168,23 +1189,25 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) opc = get_memop(oi); #if defined(CONFIG_SOFTMMU) - tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1); + base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1); tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, base, addr_regl); - addr_regl = base; - } a_bits = get_alignment_bits(opc); if (a_bits) { tcg_out_test_alignment(s, true, addr_regl, a_bits); } + base = addr_regl; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP0, base); + base = TCG_REG_TMP0; + } if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); + base = TCG_REG_TMP0; } tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64); #endif @@ -1230,7 +1253,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #else unsigned a_bits; #endif - TCGReg base = TCG_REG_TMP0; + TCGReg base; data_regl = *args++; data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0); @@ -1240,23 +1263,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) opc = get_memop(oi); #if defined(CONFIG_SOFTMMU) - tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0); + base = tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0); tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32), data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { - tcg_out_ext32u(s, base, addr_regl); - addr_regl = base; - } a_bits = get_alignment_bits(opc); if (a_bits) { tcg_out_test_alignment(s, false, addr_regl, a_bits); } + base = addr_regl; + if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { + tcg_out_ext32u(s, TCG_REG_TMP0, base); + base = TCG_REG_TMP0; + } if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); + base = TCG_REG_TMP0; } tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc); #endif diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 11c9b3e..232537c 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -81,8 +81,13 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_SP #define TCG_TARGET_STACK_ALIGN 16 -#define TCG_TARGET_CALL_ALIGN_ARGS 1 #define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#if TCG_TARGET_REG_BITS == 32 +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +#else +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#endif /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 0 diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 426dd92..15f1c55 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -13,6 +13,7 @@ C_O0_I1(r) C_O0_I2(L, L) C_O0_I2(r, r) C_O0_I2(r, ri) +C_O0_I2(r, rA) C_O0_I2(v, r) C_O1_I1(r, L) C_O1_I1(r, r) @@ -22,15 +23,24 @@ C_O1_I1(v, vr) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, 0, rJ) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) +C_O1_I2(r, r, rA) +C_O1_I2(r, r, rI) +C_O1_I2(r, r, rJ) +C_O1_I2(r, r, rK) +C_O1_I2(r, r, rKR) +C_O1_I2(r, r, rNK) +C_O1_I2(r, r, rNKR) C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) -C_O1_I4(r, r, ri, r, 0) -C_O1_I4(r, r, ri, rI, 0) -C_O2_I2(b, a, 0, r) -C_O2_I3(b, a, 0, 1, r) +C_O1_I4(r, r, ri, rI, r) +C_O1_I4(r, r, rA, rI, r) +C_O2_I2(o, m, 0, r) +C_O2_I2(o, m, r, r) +C_O2_I3(o, m, 0, 1, r) C_O2_I4(r, r, 0, 1, rA, r) C_O2_I4(r, r, 0, 1, ri, r) C_O2_I4(r, r, 0, 1, r, r) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 8bb0358..6fa64a1 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -11,13 +11,7 @@ REGS('r', ALL_GENERAL_REGS) REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) REGS('v', ALL_VECTOR_REGS) -/* - * A (single) even/odd pair for division. - * TODO: Add something to the register allocator to allow - * this kind of regno+1 pairing to be done more generally. - */ -REGS('a', 1u << TCG_REG_R2) -REGS('b', 1u << TCG_REG_R3) +REGS('o', 0xaaaa) /* odd numbered general regs */ /* * Define constraint letters for constants: @@ -26,4 +20,7 @@ REGS('b', 1u << TCG_REG_R3) CONST('A', TCG_CT_CONST_S33) CONST('I', TCG_CT_CONST_S16) CONST('J', TCG_CT_CONST_S32) +CONST('K', TCG_CT_CONST_P32) +CONST('N', TCG_CT_CONST_INV) +CONST('R', TCG_CT_CONST_INVRISBG) CONST('Z', TCG_CT_CONST_ZERO) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 33becd7..2b38fd9 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -33,15 +33,13 @@ #include "../tcg-pool.c.inc" #include "elf.h" -/* ??? The translation blocks produced by TCG are generally small enough to - be entirely reachable with a 16-bit displacement. Leaving the option for - a 32-bit displacement here Just In Case. */ -#define USE_LONG_BRANCHES 0 - -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_S32 0x200 -#define TCG_CT_CONST_S33 0x400 -#define TCG_CT_CONST_ZERO 0x800 +#define TCG_CT_CONST_S16 (1 << 8) +#define TCG_CT_CONST_S32 (1 << 9) +#define TCG_CT_CONST_S33 (1 << 10) +#define TCG_CT_CONST_ZERO (1 << 11) +#define TCG_CT_CONST_P32 (1 << 12) +#define TCG_CT_CONST_INV (1 << 13) +#define TCG_CT_CONST_INVRISBG (1 << 14) #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -65,12 +63,6 @@ /* A scratch register that may be be used throughout the backend. */ #define TCG_TMP0 TCG_REG_R1 -/* A scratch register that holds a pointer to the beginning of the TB. - We don't need this when we have pc-relative loads with the general - instructions extension facility. */ -#define TCG_REG_TB TCG_REG_R12 -#define USE_REG_TB (!HAVE_FACILITY(GEN_INST_EXT)) - #ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R13 #endif @@ -139,16 +131,19 @@ typedef enum S390Opcode { RI_OILL = 0xa50b, RI_TMLL = 0xa701, - RIE_CGIJ = 0xec7c, - RIE_CGRJ = 0xec64, - RIE_CIJ = 0xec7e, - RIE_CLGRJ = 0xec65, - RIE_CLIJ = 0xec7f, - RIE_CLGIJ = 0xec7d, - RIE_CLRJ = 0xec77, - RIE_CRJ = 0xec76, - RIE_LOCGHI = 0xec46, - RIE_RISBG = 0xec55, + RIEb_CGRJ = 0xec64, + RIEb_CLGRJ = 0xec65, + RIEb_CLRJ = 0xec77, + RIEb_CRJ = 0xec76, + + RIEc_CGIJ = 0xec7c, + RIEc_CIJ = 0xec7e, + RIEc_CLGIJ = 0xec7d, + RIEc_CLIJ = 0xec7f, + + RIEf_RISBG = 0xec55, + + RIEg_LOCGHI = 0xec46, RRE_AGR = 0xb908, RRE_ALGR = 0xb90a, @@ -183,18 +178,35 @@ typedef enum S390Opcode { RRE_SLBGR = 0xb989, RRE_XGR = 0xb982, - RRF_LOCR = 0xb9f2, - RRF_LOCGR = 0xb9e2, - RRF_NRK = 0xb9f4, - RRF_NGRK = 0xb9e4, - RRF_ORK = 0xb9f6, - RRF_OGRK = 0xb9e6, - RRF_SRK = 0xb9f9, - RRF_SGRK = 0xb9e9, - RRF_SLRK = 0xb9fb, - RRF_SLGRK = 0xb9eb, - RRF_XRK = 0xb9f7, - RRF_XGRK = 0xb9e7, + RRFa_MGRK = 0xb9ec, + RRFa_MSRKC = 0xb9fd, + RRFa_MSGRKC = 0xb9ed, + RRFa_NCRK = 0xb9f5, + RRFa_NCGRK = 0xb9e5, + RRFa_NNRK = 0xb974, + RRFa_NNGRK = 0xb964, + RRFa_NORK = 0xb976, + RRFa_NOGRK = 0xb966, + RRFa_NRK = 0xb9f4, + RRFa_NGRK = 0xb9e4, + RRFa_NXRK = 0xb977, + RRFa_NXGRK = 0xb967, + RRFa_OCRK = 0xb975, + RRFa_OCGRK = 0xb965, + RRFa_ORK = 0xb9f6, + RRFa_OGRK = 0xb9e6, + RRFa_SRK = 0xb9f9, + RRFa_SGRK = 0xb9e9, + RRFa_SLRK = 0xb9fb, + RRFa_SLGRK = 0xb9eb, + RRFa_XRK = 0xb9f7, + RRFa_XGRK = 0xb9e7, + + RRFam_SELGR = 0xb9e3, + + RRFc_LOCR = 0xb9f2, + RRFc_LOCGR = 0xb9e2, + RRFc_POPCNT = 0xb9e1, RR_AR = 0x1a, RR_ALR = 0x1e, @@ -511,6 +523,60 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type, return false; } +static int is_const_p16(uint64_t val) +{ + for (int i = 0; i < 4; ++i) { + uint64_t mask = 0xffffull << (i * 16); + if ((val & ~mask) == 0) { + return i; + } + } + return -1; +} + +static int is_const_p32(uint64_t val) +{ + if ((val & 0xffffffff00000000ull) == 0) { + return 0; + } + if ((val & 0x00000000ffffffffull) == 0) { + return 1; + } + return -1; +} + +/* + * Accept bit patterns like these: + * 0....01....1 + * 1....10....0 + * 1..10..01..1 + * 0..01..10..0 + * Copied from gcc sources. + */ +static bool risbg_mask(uint64_t c) +{ + uint64_t lsb; + /* We don't change the number of transitions by inverting, + so make sure we start with the LSB zero. */ + if (c & 1) { + c = ~c; + } + /* Reject all zeros or all ones. */ + if (c == 0) { + return false; + } + /* Find the first transition. */ + lsb = c & -c; + /* Invert to look for a second transition. */ + c = ~c; + /* Erase the first transition. */ + c &= -lsb; + /* Find the second transition, if any. */ + lsb = c & -c; + /* Match if all the bits are 1's, or if c is zero. */ + return c == -lsb; +} + /* Test if a constant matches the constraint. */ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) { @@ -533,6 +599,20 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) return val == 0; } + if (ct & TCG_CT_CONST_INV) { + val = ~val; + } + /* + * Note that is_const_p16 is a subset of is_const_p32, + * so we don't need both constraints. + */ + if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) { + return true; + } + if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) { + return true; + } + return 0; } @@ -549,8 +629,22 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, tcg_out32(s, (op << 16) | (r1 << 4) | r2); } -static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op, - TCGReg r1, TCGReg r2, int m3) +/* RRF-a without the m4 field */ +static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, TCGReg r3) +{ + tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2); +} + +/* RRF-a with the m4 field */ +static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, TCGReg r3, int m4) +{ + tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2, int m3) { tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2); } @@ -560,7 +654,7 @@ static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); } -static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1, +static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1, int i2, int m3) { tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3); @@ -780,14 +874,22 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) return true; } -static const S390Opcode lli_insns[4] = { +static const S390Opcode li_insns[4] = { RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH }; +static const S390Opcode oi_insns[4] = { + RI_OILL, RI_OILH, RI_OIHL, RI_OIHH +}; +static const S390Opcode lif_insns[2] = { + RIL_LLILF, RIL_LLIHF, +}; -static bool maybe_out_small_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long sval) +/* load a register with an immediate value */ +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long sval) { tcg_target_ulong uval = sval; + ptrdiff_t pc_off; int i; if (type == TCG_TYPE_I32) { @@ -798,100 +900,51 @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type, /* Try all 32-bit insns that can load it in one go. */ if (sval >= -0x8000 && sval < 0x8000) { tcg_out_insn(s, RI, LGHI, ret, sval); - return true; - } - - for (i = 0; i < 4; i++) { - tcg_target_long mask = 0xffffull << i*16; - if ((uval & mask) == uval) { - tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16); - return true; - } - } - - return false; -} - -/* load a register with an immediate value */ -static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, - tcg_target_long sval, bool in_prologue) -{ - tcg_target_ulong uval; - - /* Try all 32-bit insns that can load it in one go. */ - if (maybe_out_small_movi(s, type, ret, sval)) { return; } - uval = sval; - if (type == TCG_TYPE_I32) { - uval = (uint32_t)sval; - sval = (int32_t)sval; + i = is_const_p16(uval); + if (i >= 0) { + tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); + return; } /* Try all 48-bit insns that can load it in one go. */ - if (HAVE_FACILITY(EXT_IMM)) { - if (sval == (int32_t)sval) { - tcg_out_insn(s, RIL, LGFI, ret, sval); - return; - } - if (uval <= 0xffffffff) { - tcg_out_insn(s, RIL, LLILF, ret, uval); - return; - } - if ((uval & 0xffffffff) == 0) { - tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32); - return; - } + if (sval == (int32_t)sval) { + tcg_out_insn(s, RIL, LGFI, ret, sval); + return; } - /* Try for PC-relative address load. For odd addresses, - attempt to use an offset from the start of the TB. */ - if ((sval & 1) == 0) { - ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1; - if (off == (int32_t)off) { - tcg_out_insn(s, RIL, LARL, ret, off); - return; - } - } else if (USE_REG_TB && !in_prologue) { - ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval); - if (off == sextract64(off, 0, 20)) { - /* This is certain to be an address within TB, and therefore - OFF will be negative; don't try RX_LA. */ - tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off); - return; - } + i = is_const_p32(uval); + if (i >= 0) { + tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32)); + return; } - /* A 32-bit unsigned value can be loaded in 2 insns. And given - that LLILL, LLIHL, LLILF above did not succeed, we know that - both insns are required. */ - if (uval <= 0xffffffff) { - tcg_out_insn(s, RI, LLILL, ret, uval); - tcg_out_insn(s, RI, IILH, ret, uval >> 16); + /* Try for PC-relative address load. For odd addresses, add one. */ + pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1; + if (pc_off == (int32_t)pc_off) { + tcg_out_insn(s, RIL, LARL, ret, pc_off); + if (sval & 1) { + tcg_out_insn(s, RI, AGHI, ret, 1); + } return; } - /* Otherwise, stuff it in the constant pool. */ - if (HAVE_FACILITY(GEN_INST_EXT)) { - tcg_out_insn(s, RIL, LGRL, ret, 0); - new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); - } else if (USE_REG_TB && !in_prologue) { - tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, sval, R_390_20, s->code_ptr - 2, - tcg_tbrel_diff(s, NULL)); + /* Otherwise, load it by parts. */ + i = is_const_p16((uint32_t)uval); + if (i >= 0) { + tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); } else { - TCGReg base = ret ? ret : TCG_TMP0; - tcg_out_insn(s, RIL, LARL, base, 0); - new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); - tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0); + tcg_out_insn(s, RIL, LLILF, ret, uval); + } + uval >>= 32; + i = is_const_p16(uval); + if (i >= 0) { + tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16)); + } else { + tcg_out_insn(s, RIL, OIHF, ret, uval); } -} - -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long sval) -{ - tcg_out_movi_int(s, type, ret, sval, false); } /* Emit a load/store type instruction. Inputs are: @@ -1020,122 +1073,33 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -/* load data from an absolute host address */ -static void tcg_out_ld_abs(TCGContext *s, TCGType type, - TCGReg dest, const void *abs) -{ - intptr_t addr = (intptr_t)abs; - - if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) { - ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1; - if (disp == (int32_t)disp) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, LRL, dest, disp); - } else { - tcg_out_insn(s, RIL, LGRL, dest, disp); - } - return; - } - } - if (USE_REG_TB) { - ptrdiff_t disp = tcg_tbrel_diff(s, abs); - if (disp == sextract64(disp, 0, 20)) { - tcg_out_ld(s, type, dest, TCG_REG_TB, disp); - return; - } - } - - tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff); - tcg_out_ld(s, type, dest, dest, addr & 0xffff); -} - static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, int msb, int lsb, int ofs, int z) { /* Format RIE-f */ - tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src); + tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src); tcg_out16(s, (msb << 8) | (z << 7) | lsb); - tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff)); + tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff)); } static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { - if (HAVE_FACILITY(EXT_IMM)) { - tcg_out_insn(s, RRE, LGBR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56); - } + tcg_out_insn(s, RRE, LGBR, dest, src); } static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { - if (HAVE_FACILITY(EXT_IMM)) { - tcg_out_insn(s, RRE, LLGCR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } + tcg_out_insn(s, RRE, LLGCR, dest, src); } static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { - if (HAVE_FACILITY(EXT_IMM)) { - tcg_out_insn(s, RRE, LGHR, dest, src); - return; - } - - if (type == TCG_TYPE_I32) { - if (dest == src) { - tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16); - } - tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16); - } else { - tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48); - tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48); - } + tcg_out_insn(s, RRE, LGHR, dest, src); } static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) { - if (HAVE_FACILITY(EXT_IMM)) { - tcg_out_insn(s, RRE, LLGHR, dest, src); - return; - } - - if (dest == src) { - tcg_out_movi(s, type, TCG_TMP0, 0xffff); - src = TCG_TMP0; - } else { - tcg_out_movi(s, type, dest, 0xffff); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, src); - } else { - tcg_out_insn(s, RRE, NGR, dest, src); - } + tcg_out_insn(s, RRE, LLGHR, dest, src); } static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src) @@ -1148,36 +1112,6 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) tcg_out_insn(s, RRE, LLGFR, dest, src); } -/* Accept bit patterns like these: - 0....01....1 - 1....10....0 - 1..10..01..1 - 0..01..10..0 - Copied from gcc sources. */ -static inline bool risbg_mask(uint64_t c) -{ - uint64_t lsb; - /* We don't change the number of transitions by inverting, - so make sure we start with the LSB zero. */ - if (c & 1) { - c = ~c; - } - /* Reject all zeros or all ones. */ - if (c == 0) { - return false; - } - /* Find the first transition. */ - lsb = c & -c; - /* Invert to look for a second transition. */ - c = ~c; - /* Erase the first transition. */ - c &= -lsb; - /* Find the second transition, if any. */ - lsb = c & -c; - /* Match if all the bits are 1's, or if c is zero. */ - return c == -lsb; -} - static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val) { int msb, lsb; @@ -1208,157 +1142,78 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) tgen_ext32u(s, dest, dest); return; } - if (HAVE_FACILITY(EXT_IMM)) { - if ((val & valid) == 0xff) { - tgen_ext8u(s, TCG_TYPE_I64, dest, dest); - return; - } - if ((val & valid) == 0xffff) { - tgen_ext16u(s, TCG_TYPE_I64, dest, dest); - return; - } + if ((val & valid) == 0xff) { + tgen_ext8u(s, TCG_TYPE_I64, dest, dest); + return; + } + if ((val & valid) == 0xffff) { + tgen_ext16u(s, TCG_TYPE_I64, dest, dest); + return; } - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = ~(0xffffull << i*16); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); - return; - } + i = is_const_p16(~val & valid); + if (i >= 0) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16)); + return; } - /* Try all 48-bit insns that can perform it in one go. */ - if (HAVE_FACILITY(EXT_IMM)) { - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = ~(0xffffffffull << i*32); - if (((val | ~valid) & mask) == mask) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; - } - } + i = is_const_p32(~val & valid); + tcg_debug_assert(i == 0 || type != TCG_TYPE_I32); + if (i >= 0) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32)); + return; } - if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) { + + if (risbg_mask(val)) { tgen_andi_risbg(s, dest, dest, val); return; } - /* Use the constant pool if USE_REG_TB, but not for small constants. */ - if (USE_REG_TB) { - if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) { - tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2, - tcg_tbrel_diff(s, NULL)); - return; - } - } else { - tcg_out_movi(s, type, TCG_TMP0, val); - } - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, NR, dest, TCG_TMP0); - } else { - tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); - } + g_assert_not_reached(); } -static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) +static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val) { - static const S390Opcode oi_insns[4] = { - RI_OILL, RI_OILH, RI_OIHL, RI_OIHH - }; static const S390Opcode oif_insns[2] = { RIL_OILF, RIL_OIHF }; int i; - /* Look for no-op. */ - if (unlikely(val == 0)) { + i = is_const_p16(val); + if (i >= 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16)); return; } - /* Try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - return; - } - } - - /* Try all 48-bit insns that can perform it in one go. */ - if (HAVE_FACILITY(EXT_IMM)) { - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = (0xffffffffull << i*32); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32); - return; - } - } + i = is_const_p32(val); + if (i >= 0) { + tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32)); + return; } - /* Use the constant pool if USE_REG_TB, but not for small constants. */ - if (maybe_out_small_movi(s, type, TCG_TMP0, val)) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, OR, dest, TCG_TMP0); - } else { - tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0); - } - } else if (USE_REG_TB) { - tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, val, R_390_20, s->code_ptr - 2, - tcg_tbrel_diff(s, NULL)); - } else { - /* Perform the OR via sequential modifications to the high and - low parts. Do this via recursion to handle 16-bit vs 32-bit - masks in each half. */ - tcg_debug_assert(HAVE_FACILITY(EXT_IMM)); - tgen_ori(s, type, dest, val & 0x00000000ffffffffull); - tgen_ori(s, type, dest, val & 0xffffffff00000000ull); - } + g_assert_not_reached(); } -static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val) +static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val) { - /* Try all 48-bit insns that can perform it in one go. */ - if (HAVE_FACILITY(EXT_IMM)) { - if ((val & 0xffffffff00000000ull) == 0) { - tcg_out_insn(s, RIL, XILF, dest, val); - return; - } - if ((val & 0x00000000ffffffffull) == 0) { - tcg_out_insn(s, RIL, XIHF, dest, val >> 32); - return; - } - } - - /* Use the constant pool if USE_REG_TB, but not for small constants. */ - if (maybe_out_small_movi(s, type, TCG_TMP0, val)) { - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RR, XR, dest, TCG_TMP0); - } else { - tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0); - } - } else if (USE_REG_TB) { - tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, val, R_390_20, s->code_ptr - 2, - tcg_tbrel_diff(s, NULL)); - } else { - /* Perform the xor by parts. */ - tcg_debug_assert(HAVE_FACILITY(EXT_IMM)); - if (val & 0xffffffff) { - tcg_out_insn(s, RIL, XILF, dest, val); - } - if (val > 0xffffffff) { - tcg_out_insn(s, RIL, XIHF, dest, val >> 32); - } + switch (is_const_p32(val)) { + case 0: + tcg_out_insn(s, RIL, XILF, dest, val); + break; + case 1: + tcg_out_insn(s, RIL, XIHF, dest, val >> 32); + break; + default: + g_assert_not_reached(); } } -static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, - TCGArg c2, bool c2const, bool need_carry) +static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, + TCGArg c2, bool c2const, bool need_carry, int *inv_cc) { bool is_unsigned = is_unsigned_cond(c); + TCGCond inv_c = tcg_invert_cond(c); S390Opcode op; if (c2const) { @@ -1369,6 +1224,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, } else { tcg_out_insn(s, RRE, LTGR, r1, r1); } + *inv_cc = tcg_cond_to_ltr_cond[inv_c]; return tcg_cond_to_ltr_cond[c]; } } @@ -1379,48 +1235,25 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, goto exit; } - if (HAVE_FACILITY(EXT_IMM)) { - if (type == TCG_TYPE_I32) { - op = (is_unsigned ? RIL_CLFI : RIL_CFI); - tcg_out_insn_RIL(s, op, r1, c2); - goto exit; - } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) { - op = (is_unsigned ? RIL_CLGFI : RIL_CGFI); - tcg_out_insn_RIL(s, op, r1, c2); - goto exit; - } + if (type == TCG_TYPE_I32) { + op = (is_unsigned ? RIL_CLFI : RIL_CFI); + tcg_out_insn_RIL(s, op, r1, c2); + goto exit; } - /* Use the constant pool, but not for small constants. */ - if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) { - c2 = TCG_TMP0; - /* fall through to reg-reg */ - } else if (USE_REG_TB) { - if (type == TCG_TYPE_I32) { - op = (is_unsigned ? RXY_CLY : RXY_CY); - tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2, - 4 - tcg_tbrel_diff(s, NULL)); - } else { - op = (is_unsigned ? RXY_CLG : RXY_CG); - tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0); - new_pool_label(s, c2, R_390_20, s->code_ptr - 2, - tcg_tbrel_diff(s, NULL)); - } - goto exit; - } else { - if (type == TCG_TYPE_I32) { - op = (is_unsigned ? RIL_CLRL : RIL_CRL); - tcg_out_insn_RIL(s, op, r1, 0); - new_pool_label(s, (uint32_t)c2, R_390_PC32DBL, - s->code_ptr - 2, 2 + 4); - } else { - op = (is_unsigned ? RIL_CLGRL : RIL_CGRL); - tcg_out_insn_RIL(s, op, r1, 0); - new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2); - } + /* + * Constraints are for a signed 33-bit operand, which is a + * convenient superset of this signed/unsigned test. + */ + if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) { + op = (is_unsigned ? RIL_CLGFI : RIL_CGFI); + tcg_out_insn_RIL(s, op, r1, c2); goto exit; } + + /* Load everything else into a register. */ + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2); + c2 = TCG_TMP0; } if (type == TCG_TYPE_I32) { @@ -1432,27 +1265,31 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, } exit: + *inv_cc = tcg_cond_to_s390_cond[inv_c]; return tcg_cond_to_s390_cond[c]; } +static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, + TCGArg c2, bool c2const, bool need_carry) +{ + int inv_cc; + return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc); +} + static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int c2const) { int cc; - bool have_loc; /* With LOC2, we can always emit the minimum 3 insns. */ if (HAVE_FACILITY(LOAD_ON_COND2)) { /* Emit: d = 0, d = (cc ? 1 : d). */ cc = tgen_cmp(s, type, cond, c1, c2, c2const, false); tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc); + tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc); return; } - have_loc = HAVE_FACILITY(LOAD_ON_COND); - - /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller. */ restart: switch (cond) { case TCG_COND_NE: @@ -1497,60 +1334,74 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, case TCG_COND_LT: case TCG_COND_GE: /* Swap operands so that we can use LEU/GTU/GT/LE. */ - if (c2const) { - if (have_loc) { - break; - } - tcg_out_movi(s, type, TCG_TMP0, c2); - c2 = c1; - c2const = 0; - c1 = TCG_TMP0; - } else { + if (!c2const) { TCGReg t = c1; c1 = c2; c2 = t; + cond = tcg_swap_cond(cond); + goto restart; } - cond = tcg_swap_cond(cond); - goto restart; + break; default: g_assert_not_reached(); } cc = tgen_cmp(s, type, cond, c1, c2, c2const, false); - if (have_loc) { - /* Emit: d = 0, t = 1, d = (cc ? t : d). */ - tcg_out_movi(s, TCG_TYPE_I64, dest, 0); - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); - tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc); + /* Emit: d = 0, t = 1, d = (cc ? t : d). */ + tcg_out_movi(s, TCG_TYPE_I64, dest, 0); + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1); + tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc); +} + +static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest, + TCGArg v3, int v3const, TCGReg v4, + int cc, int inv_cc) +{ + TCGReg src; + + if (v3const) { + if (dest == v4) { + if (HAVE_FACILITY(LOAD_ON_COND2)) { + /* Emit: if (cc) dest = v3. */ + tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc); + return; + } + tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3); + src = TCG_TMP0; + } else { + /* LGR+LOCGHI is larger than LGHI+LOCGR. */ + tcg_out_insn(s, RI, LGHI, dest, v3); + cc = inv_cc; + src = v4; + } } else { - /* Emit: d = 1; if (cc) goto over; d = 0; over: */ - tcg_out_movi(s, type, dest, 1); - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_movi(s, type, dest, 0); + if (HAVE_FACILITY(MISC_INSN_EXT3)) { + /* Emit: dest = cc ? v3 : v4. */ + tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc); + return; + } + if (dest == v4) { + src = v3; + } else { + tcg_out_mov(s, type, dest, v3); + cc = inv_cc; + src = v4; + } } + + /* Emit: if (cc) dest = src. */ + tcg_out_insn(s, RRFc, LOCGR, dest, src, cc); } static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, TCGReg c1, TCGArg c2, int c2const, - TCGArg v3, int v3const) + TCGArg v3, int v3const, TCGReg v4) { - int cc; - if (HAVE_FACILITY(LOAD_ON_COND)) { - cc = tgen_cmp(s, type, c, c1, c2, c2const, false); - if (v3const) { - tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc); - } else { - tcg_out_insn(s, RRF, LOCGR, dest, v3, cc); - } - } else { - c = tcg_invert_cond(c); - cc = tgen_cmp(s, type, c, c1, c2, c2const, false); + int cc, inv_cc; - /* Emit: if (cc) goto over; dest = r3; over: */ - tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1); - tcg_out_insn(s, RRE, LGR, dest, v3); - } + cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc); + tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc); } static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1, @@ -1563,20 +1414,40 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1, if (a2const && a2 == 64) { tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0); - } else { - if (a2const) { - tcg_out_movi(s, TCG_TYPE_I64, dest, a2); - } else { - tcg_out_mov(s, TCG_TYPE_I64, dest, a2); - } - if (HAVE_FACILITY(LOAD_ON_COND)) { - /* Emit: if (one bit found) dest = r0. */ - tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2); - } else { - /* Emit: if (no one bit found) goto over; dest = r0; over: */ - tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1); - tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0); + return; + } + + /* + * Conditions from FLOGR are: + * 2 -> one bit found + * 8 -> no one bit found + */ + tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); +} + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + /* With MIE3, and bit 0 of m4 set, we get the complete result. */ + if (HAVE_FACILITY(MISC_INSN_EXT3)) { + if (type == TCG_TYPE_I32) { + tgen_ext32u(s, dest, src); + src = dest; } + tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); + return; + } + + /* Without MIE3, each byte gets the count of bits for the byte. */ + tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); + + /* Multiply to sum each byte at the top of the word. */ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); + tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); + tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); + tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); } } @@ -1611,10 +1482,6 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) { if (l->has_value) { tgen_gotoi(s, cc, l->u.value_ptr); - } else if (USE_LONG_BRANCHES) { - tcg_out16(s, RIL_BRCL | (cc << 4)); - tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2); - s->code_ptr += 2; } else { tcg_out16(s, RI_BRC | (cc << 4)); tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2); @@ -1626,6 +1493,7 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, TCGLabel *l) { tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); + /* Format RIE-b */ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2); tcg_out16(s, 0); tcg_out16(s, cc << 12 | (opc & 0xff)); @@ -1635,6 +1503,7 @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, int i2, TCGLabel *l) { tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); + /* Format RIE-c */ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc); tcg_out16(s, 0); tcg_out16(s, (i2 << 8) | (opc & 0xff)); @@ -1644,54 +1513,53 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, TCGArg c2, int c2const, TCGLabel *l) { int cc; + bool is_unsigned = is_unsigned_cond(c); + bool in_range; + S390Opcode opc; - if (HAVE_FACILITY(GEN_INST_EXT)) { - bool is_unsigned = is_unsigned_cond(c); - bool in_range; - S390Opcode opc; - - cc = tcg_cond_to_s390_cond[c]; + cc = tcg_cond_to_s390_cond[c]; - if (!c2const) { - opc = (type == TCG_TYPE_I32 - ? (is_unsigned ? RIE_CLRJ : RIE_CRJ) - : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ)); - tgen_compare_branch(s, opc, cc, r1, c2, l); - return; - } + if (!c2const) { + opc = (type == TCG_TYPE_I32 + ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ) + : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ)); + tgen_compare_branch(s, opc, cc, r1, c2, l); + return; + } - /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. - If the immediate we've been given does not fit that range, we'll - fall back to separate compare and branch instructions using the - larger comparison range afforded by COMPARE IMMEDIATE. */ - if (type == TCG_TYPE_I32) { - if (is_unsigned) { - opc = RIE_CLIJ; - in_range = (uint32_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CIJ; - in_range = (int32_t)c2 == (int8_t)c2; - } + /* + * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field. + * If the immediate we've been given does not fit that range, we'll + * fall back to separate compare and branch instructions using the + * larger comparison range afforded by COMPARE IMMEDIATE. + */ + if (type == TCG_TYPE_I32) { + if (is_unsigned) { + opc = RIEc_CLIJ; + in_range = (uint32_t)c2 == (uint8_t)c2; } else { - if (is_unsigned) { - opc = RIE_CLGIJ; - in_range = (uint64_t)c2 == (uint8_t)c2; - } else { - opc = RIE_CGIJ; - in_range = (int64_t)c2 == (int8_t)c2; - } + opc = RIEc_CIJ; + in_range = (int32_t)c2 == (int8_t)c2; } - if (in_range) { - tgen_compare_imm_branch(s, opc, cc, r1, c2, l); - return; + } else { + if (is_unsigned) { + opc = RIEc_CLGIJ; + in_range = (uint64_t)c2 == (uint8_t)c2; + } else { + opc = RIEc_CGIJ; + in_range = (int64_t)c2 == (int8_t)c2; } } + if (in_range) { + tgen_compare_imm_branch(s, opc, cc, r1, c2, l); + return; + } cc = tgen_cmp(s, type, c, r1, c2, c2const, false); tgen_branch(s, cc, l); } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; if (off == (int32_t)off) { @@ -1702,6 +1570,12 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) } } +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, + const TCGHelperInfo *info) +{ + tcg_out_call_int(s, dest); +} + static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data, TCGReg base, TCGReg index, int disp) { @@ -1837,7 +1711,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc, cross pages using the address of the last byte of the access. */ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask); tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; - if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) { + if (a_off == 0) { tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); } else { tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); @@ -1897,7 +1771,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]); tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); @@ -1938,7 +1812,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) } tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi); tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr); - tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr); return true; @@ -2095,43 +1969,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: a0 = args[0]; - if (s->tb_jmp_insn_offset) { - /* - * branch displacement must be aligned for atomic patching; - * see if we need to add extra nop before branch - */ - if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { - tcg_out16(s, NOP); - } - tcg_debug_assert(!USE_REG_TB); - tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - s->code_ptr += 2; - } else { - /* load address stored at s->tb_jmp_target_addr + a0 */ - tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB, - tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0)); - /* and go there */ - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB); - } + /* + * branch displacement must be aligned for atomic patching; + * see if we need to add extra nop before branch + */ + if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { + tcg_out16(s, NOP); + } + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + s->code_ptr += 2; set_jmp_reset_offset(s, a0); - - /* For the unlinked path of goto_tb, we need to reset - TCG_REG_TB to the beginning of this TB. */ - if (USE_REG_TB) { - int ofs = -tcg_current_code_size(s); - /* All TB are restricted to 64KiB by unwind info. */ - tcg_debug_assert(ofs == sextract64(ofs, 0, 20)); - tcg_out_insn(s, RXY, LAY, TCG_REG_TB, - TCG_REG_TB, TCG_REG_NONE, ofs); - } break; case INDEX_op_goto_ptr: a0 = args[0]; - if (USE_REG_TB) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); - } tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); break; @@ -2183,10 +2035,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RI, AHI, a0, a2); break; } - if (HAVE_FACILITY(EXT_IMM)) { - tcg_out_insn(s, RIL, AFI, a0, a2); - break; - } + tcg_out_insn(s, RIL, AFI, a0, a2); + break; } tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); } else if (a0 == a1) { @@ -2203,7 +2053,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else if (a0 == a1) { tcg_out_insn(s, RR, SR, a0, a2); } else { - tcg_out_insn(s, RRF, SRK, a0, a1, a2); + tcg_out_insn(s, RRFa, SRK, a0, a1, a2); } break; @@ -2215,53 +2065,102 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } else if (a0 == a1) { tcg_out_insn(s, RR, NR, a0, a2); } else { - tcg_out_insn(s, RRF, NRK, a0, a1, a2); + tcg_out_insn(s, RRFa, NRK, a0, a1, a2); } break; case INDEX_op_or_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_ori(s, TCG_TYPE_I32, a0, a2); + tgen_ori(s, a0, a2); } else if (a0 == a1) { tcg_out_insn(s, RR, OR, a0, a2); } else { - tcg_out_insn(s, RRF, ORK, a0, a1, a2); + tcg_out_insn(s, RRFa, ORK, a0, a1, a2); } break; case INDEX_op_xor_i32: a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; if (const_args[2]) { tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_xori(s, TCG_TYPE_I32, a0, a2); + tcg_out_insn(s, RIL, XILF, a0, a2); } else if (a0 == a1) { tcg_out_insn(s, RR, XR, args[0], args[2]); } else { - tcg_out_insn(s, RRF, XRK, a0, a1, a2); + tcg_out_insn(s, RRFa, XRK, a0, a1, a2); + } + break; + + case INDEX_op_andc_i32: + a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2); + } else { + tcg_out_insn(s, RRFa, NCRK, a0, a1, a2); + } + break; + case INDEX_op_orc_i32: + a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tgen_ori(s, a0, (uint32_t)~a2); + } else { + tcg_out_insn(s, RRFa, OCRK, a0, a1, a2); } break; + case INDEX_op_eqv_i32: + a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tcg_out_insn(s, RIL, XILF, a0, ~a2); + } else { + tcg_out_insn(s, RRFa, NXRK, a0, a1, a2); + } + break; + case INDEX_op_nand_i32: + tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]); + break; + case INDEX_op_nor_i32: + tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]); + break; case INDEX_op_neg_i32: tcg_out_insn(s, RR, LCR, args[0], args[1]); break; + case INDEX_op_not_i32: + tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]); + break; case INDEX_op_mul_i32: + a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; if (const_args[2]) { - if ((int32_t)args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MHI, args[0], args[2]); + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MHI, a0, a2); } else { - tcg_out_insn(s, RIL, MSFI, args[0], args[2]); + tcg_out_insn(s, RIL, MSFI, a0, a2); } + } else if (a0 == a1) { + tcg_out_insn(s, RRE, MSR, a0, a2); } else { - tcg_out_insn(s, RRE, MSR, args[0], args[2]); + tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); } break; case INDEX_op_div2_i32: - tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); + tcg_debug_assert(args[0] == args[2]); + tcg_debug_assert(args[1] == args[3]); + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RR, DR, args[1], args[4]); break; case INDEX_op_divu2_i32: - tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); + tcg_debug_assert(args[0] == args[2]); + tcg_debug_assert(args[1] == args[3]); + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RRE, DLR, args[1], args[4]); break; case INDEX_op_shl_i32: @@ -2387,7 +2286,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_movcond_i32: tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); + args[2], const_args[2], args[3], const_args[3], args[4]); break; case INDEX_op_qemu_ld_i32: @@ -2429,17 +2328,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RI, AGHI, a0, a2); break; } - if (HAVE_FACILITY(EXT_IMM)) { - if (a2 == (int32_t)a2) { - tcg_out_insn(s, RIL, AGFI, a0, a2); - break; - } else if (a2 == (uint32_t)a2) { - tcg_out_insn(s, RIL, ALGFI, a0, a2); - break; - } else if (-a2 == (uint32_t)-a2) { - tcg_out_insn(s, RIL, SLGFI, a0, -a2); - break; - } + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + break; + } + if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + break; + } + if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + break; } } tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); @@ -2454,10 +2353,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, if (const_args[2]) { a2 = -a2; goto do_addi_64; - } else if (a0 == a1) { - tcg_out_insn(s, RRE, SGR, a0, a2); } else { - tcg_out_insn(s, RRF, SGRK, a0, a1, a2); + tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); } break; @@ -2466,66 +2363,119 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, if (const_args[2]) { tcg_out_mov(s, TCG_TYPE_I64, a0, a1); tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, NGR, args[0], args[2]); } else { - tcg_out_insn(s, RRF, NGRK, a0, a1, a2); + tcg_out_insn(s, RRFa, NGRK, a0, a1, a2); } break; case INDEX_op_or_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_ori(s, TCG_TYPE_I64, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, OGR, a0, a2); + tgen_ori(s, a0, a2); } else { - tcg_out_insn(s, RRF, OGRK, a0, a1, a2); + tcg_out_insn(s, RRFa, OGRK, a0, a1, a2); } break; case INDEX_op_xor_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_xori(s, TCG_TYPE_I64, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, XGR, a0, a2); + tgen_xori(s, a0, a2); } else { - tcg_out_insn(s, RRF, XGRK, a0, a1, a2); + tcg_out_insn(s, RRFa, XGRK, a0, a1, a2); } break; + case INDEX_op_andc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); + tgen_andi(s, TCG_TYPE_I64, a0, ~a2); + } else { + tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2); + } + break; + case INDEX_op_orc_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); + tgen_ori(s, a0, ~a2); + } else { + tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2); + } + break; + case INDEX_op_eqv_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; + if (const_args[2]) { + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); + tgen_xori(s, a0, ~a2); + } else { + tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2); + } + break; + case INDEX_op_nand_i64: + tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]); + break; + case INDEX_op_nor_i64: + tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]); + break; + case INDEX_op_neg_i64: tcg_out_insn(s, RRE, LCGR, args[0], args[1]); break; + case INDEX_op_not_i64: + tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]); + break; case INDEX_op_bswap64_i64: tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); break; case INDEX_op_mul_i64: + a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - if (args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MGHI, args[0], args[2]); + tcg_out_mov(s, TCG_TYPE_I64, a0, a1); + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MGHI, a0, a2); } else { - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); + tcg_out_insn(s, RIL, MSGFI, a0, a2); } + } else if (a0 == a1) { + tcg_out_insn(s, RRE, MSGR, a0, a2); } else { - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); } break; case INDEX_op_div2_i64: - /* ??? We get an unnecessary sign-extension of the dividend - into R3 with this definition, but as we do in fact always - produce both quotient and remainder using INDEX_op_div_i64 - instead requires jumping through even more hoops. */ - tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); + /* + * ??? We get an unnecessary sign-extension of the dividend + * into op0 with this definition, but as we do in fact always + * produce both quotient and remainder using INDEX_op_div_i64 + * instead requires jumping through even more hoops. + */ + tcg_debug_assert(args[0] == args[2]); + tcg_debug_assert(args[1] == args[3]); + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RRE, DSGR, args[1], args[4]); break; case INDEX_op_divu2_i64: - tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); + tcg_debug_assert(args[0] == args[2]); + tcg_debug_assert(args[1] == args[3]); + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RRE, DLGR, args[1], args[4]); break; case INDEX_op_mulu2_i64: - tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]); + tcg_debug_assert(args[0] == args[2]); + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RRE, MLGR, args[1], args[3]); + break; + case INDEX_op_muls2_i64: + tcg_debug_assert((args[1] & 1) == 0); + tcg_debug_assert(args[0] == args[1] + 1); + tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); break; case INDEX_op_shl_i64: @@ -2620,7 +2570,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_movcond_i64: tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3]); + args[2], const_args[2], args[3], const_args[3], args[4]); break; OP_32_64(deposit): @@ -2650,11 +2600,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_clz(s, args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_ctpop_i32: + tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ctpop_i64: + tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ if (args[0] & TCG_MO_ST_LD) { - tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0); + /* fast-bcr-serialization facility (45) is present */ + tcg_out_insn(s, RR, BCR, 14, 0); } break; @@ -3135,46 +3093,60 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotl_i64: case INDEX_op_rotr_i32: case INDEX_op_rotr_i64: - case INDEX_op_clz_i64: case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: return C_O1_I2(r, r, ri); + case INDEX_op_setcond_i64: + return C_O1_I2(r, r, rA); + + case INDEX_op_clz_i64: + return C_O1_I2(r, r, rI); case INDEX_op_sub_i32: case INDEX_op_sub_i64: case INDEX_op_and_i32: - case INDEX_op_and_i64: case INDEX_op_or_i32: - case INDEX_op_or_i64: case INDEX_op_xor_i32: + return C_O1_I2(r, r, ri); + case INDEX_op_and_i64: + return C_O1_I2(r, r, rNKR); + case INDEX_op_or_i64: case INDEX_op_xor_i64: - return (HAVE_FACILITY(DISTINCT_OPS) - ? C_O1_I2(r, r, ri) - : C_O1_I2(r, 0, ri)); + return C_O1_I2(r, r, rK); - case INDEX_op_mul_i32: - /* If we have the general-instruction-extensions, then we have - MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we - have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */ - return (HAVE_FACILITY(GEN_INST_EXT) - ? C_O1_I2(r, 0, ri) - : C_O1_I2(r, 0, rI)); + case INDEX_op_andc_i32: + case INDEX_op_orc_i32: + case INDEX_op_eqv_i32: + return C_O1_I2(r, r, ri); + case INDEX_op_andc_i64: + return C_O1_I2(r, r, rKR); + case INDEX_op_orc_i64: + case INDEX_op_eqv_i64: + return C_O1_I2(r, r, rNK); + + case INDEX_op_nand_i32: + case INDEX_op_nand_i64: + case INDEX_op_nor_i32: + case INDEX_op_nor_i64: + return C_O1_I2(r, r, r); + case INDEX_op_mul_i32: + return (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, ri) + : C_O1_I2(r, 0, ri)); case INDEX_op_mul_i64: - return (HAVE_FACILITY(GEN_INST_EXT) - ? C_O1_I2(r, 0, rJ) - : C_O1_I2(r, 0, rI)); + return (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, rJ) + : C_O1_I2(r, 0, rJ)); case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: - return (HAVE_FACILITY(DISTINCT_OPS) - ? C_O1_I2(r, r, ri) - : C_O1_I2(r, 0, ri)); + return C_O1_I2(r, r, ri); case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: return C_O0_I2(r, ri); + case INDEX_op_brcond_i64: + return C_O0_I2(r, rA); case INDEX_op_bswap16_i32: case INDEX_op_bswap16_i64: @@ -3183,6 +3155,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_bswap64_i64: case INDEX_op_neg_i32: case INDEX_op_neg_i64: + case INDEX_op_not_i32: + case INDEX_op_not_i64: case INDEX_op_ext8s_i32: case INDEX_op_ext8s_i64: case INDEX_op_ext8u_i32: @@ -3197,6 +3171,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: case INDEX_op_extract_i64: + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i32: @@ -3211,31 +3187,28 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(r, rZ, r); case INDEX_op_movcond_i32: + return C_O1_I4(r, r, ri, rI, r); case INDEX_op_movcond_i64: - return (HAVE_FACILITY(LOAD_ON_COND2) - ? C_O1_I4(r, r, ri, rI, 0) - : C_O1_I4(r, r, ri, r, 0)); + return C_O1_I4(r, r, rA, rI, r); case INDEX_op_div2_i32: case INDEX_op_div2_i64: case INDEX_op_divu2_i32: case INDEX_op_divu2_i64: - return C_O2_I3(b, a, 0, 1, r); + return C_O2_I3(o, m, 0, 1, r); case INDEX_op_mulu2_i64: - return C_O2_I2(b, a, 0, r); + return C_O2_I2(o, m, 0, r); + case INDEX_op_muls2_i64: + return C_O2_I2(o, m, r, r); case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - return (HAVE_FACILITY(EXT_IMM) - ? C_O2_I4(r, r, 0, 1, ri, r) - : C_O2_I4(r, r, 0, 1, r, r)); + return C_O2_I4(r, r, 0, 1, ri, r); case INDEX_op_add2_i64: case INDEX_op_sub2_i64: - return (HAVE_FACILITY(EXT_IMM) - ? C_O2_I4(r, r, 0, 1, rA, r) - : C_O2_I4(r, r, 0, 1, r, r)); + return C_O2_I4(r, r, 0, 1, rA, r); case INDEX_op_st_vec: return C_O0_I2(v, r); @@ -3301,6 +3274,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) static void query_s390_facilities(void) { unsigned long hwcap = qemu_getauxval(AT_HWCAP); + const char *which; /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this is present on all 64-bit systems, but let's check for it anyway. */ @@ -3322,6 +3296,38 @@ static void query_s390_facilities(void) if (!(hwcap & HWCAP_S390_VXRS)) { s390_facilities[2] = 0; } + + /* + * Minimum supported cpu revision is z196. + * Check for all required facilities. + * ZARCH_ACTIVE is done via preprocessor check for 64-bit. + */ + if (!HAVE_FACILITY(LONG_DISP)) { + which = "long-displacement"; + goto fail; + } + if (!HAVE_FACILITY(EXT_IMM)) { + which = "extended-immediate"; + goto fail; + } + if (!HAVE_FACILITY(GEN_INST_EXT)) { + which = "general-instructions-extension"; + goto fail; + } + /* + * Facility 45 is a big bin that contains: distinct-operands, + * fast-BCR-serialization, high-word, population-count, + * interlocked-access-1, and load/store-on-condition-1 + */ + if (!HAVE_FACILITY(45)) { + which = "45"; + goto fail; + } + return; + + fail: + error_report("%s: missing required facility %s", __func__, which); + exit(EXIT_FAILURE); } static void tcg_target_init(TCGContext *s) @@ -3378,9 +3384,6 @@ static void tcg_target_init(TCGContext *s) /* XXX many insns can't be used with R0, so we better avoid it for now */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); - if (USE_REG_TB) { - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); - } } #define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \ @@ -3401,16 +3404,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef CONFIG_SOFTMMU if (guest_base >= 0x80000) { - tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); - if (USE_REG_TB) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, - tcg_target_call_iarg_regs[1]); - } /* br %r3 (go to TB) */ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]); diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 23e2063..68dcbc6 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -52,17 +52,19 @@ typedef enum TCGReg { #define TCG_TARGET_NB_REGS 64 -/* A list of relevant facilities used by this translator. Some of these - are required for proper operation, and these are checked at startup. */ +/* Facilities required for proper operation; checked at startup. */ #define FACILITY_ZARCH_ACTIVE 2 #define FACILITY_LONG_DISP 18 #define FACILITY_EXT_IMM 21 #define FACILITY_GEN_INST_EXT 34 -#define FACILITY_LOAD_ON_COND 45 -#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND -#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND +#define FACILITY_45 45 + +/* Facilities that are checked at runtime. */ + #define FACILITY_LOAD_ON_COND2 53 +#define FACILITY_MISC_INSN_EXT2 58 +#define FACILITY_MISC_INSN_EXT3 61 #define FACILITY_VECTOR 129 #define FACILITY_VECTOR_ENH1 135 @@ -80,18 +82,18 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_ext16u_i32 1 #define TCG_TARGET_HAS_bswap16_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 0 +#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_neg_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 +#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT) -#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT) +#define TCG_TARGET_HAS_ctpop_i32 1 +#define TCG_TARGET_HAS_deposit_i32 1 +#define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 #define TCG_TARGET_HAS_extract2_i32 0 #define TCG_TARGET_HAS_movcond_i32 1 @@ -103,7 +105,7 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_direct_jump HAVE_FACILITY(GEN_INST_EXT) +#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 @@ -117,25 +119,25 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 0 +#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_neg_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM) +#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) +#define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT) -#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT) +#define TCG_TARGET_HAS_ctpop_i64 1 +#define TCG_TARGET_HAS_deposit_i64 1 +#define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 #define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2) #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0 @@ -166,8 +168,9 @@ extern uint64_t s390_facilities[3]; /* used for function call generation */ #define TCG_TARGET_STACK_ALIGN 8 #define TCG_TARGET_CALL_STACK_OFFSET 160 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL -#define TCG_TARGET_EXTEND_ARGS 1 #define TCG_TARGET_HAS_MEMORY_BSWAP 1 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index cb9453e..eb913f3 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -859,7 +859,8 @@ static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest, } } -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest) +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, + const TCGHelperInfo *info) { tcg_out_call_nodelay(s, dest, false); tcg_out_nop(s); diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 8655acd..0044ac8 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -71,7 +71,8 @@ typedef enum { #define TCG_TARGET_STACK_BIAS 2047 #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) -#define TCG_TARGET_EXTEND_ARGS 1 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #if defined(__VIS__) && __VIS__ >= 0x300 #define use_vis3_instructions 1 diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index cc82088..6e50aeb 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -25,13 +25,53 @@ #ifndef TCG_INTERNAL_H #define TCG_INTERNAL_H +#ifdef CONFIG_TCG_INTERPRETER +#include <ffi.h> +#endif + #define TCG_HIGHWATER 1024 +/* + * Describe the calling convention of a given argument type. + */ +typedef enum { + TCG_CALL_RET_NORMAL, /* by registers */ +} TCGCallReturnKind; + +typedef enum { + TCG_CALL_ARG_NORMAL, /* by registers (continuing onto stack) */ + TCG_CALL_ARG_EVEN, /* like normal, but skipping odd slots */ + TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */ + TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */ + TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ +} TCGCallArgumentKind; + +typedef struct TCGCallArgumentLoc { + TCGCallArgumentKind kind : 8; + unsigned arg_slot : 8; + unsigned ref_slot : 8; + unsigned arg_idx : 4; + unsigned tmp_subindex : 2; +} TCGCallArgumentLoc; + +/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */ +#define REG_P(L) \ + ((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs)) + typedef struct TCGHelperInfo { void *func; const char *name; - unsigned flags; - unsigned typemask; +#ifdef CONFIG_TCG_INTERPRETER + ffi_cif *cif; +#endif + unsigned typemask : 32; + unsigned flags : 8; + unsigned nr_in : 8; + unsigned nr_out : 8; + TCGCallReturnKind out_kind : 8; + + /* Maximum physical arguments are constrained by TCG_TYPE_I128. */ + TCGCallArgumentLoc in[MAX_CALL_IARGS * (128 / TCG_TARGET_REG_BITS)]; } TCGHelperInfo; extern TCGContext tcg_init_ctx; @@ -59,4 +99,18 @@ static inline unsigned tcg_call_flags(TCGOp *op) return tcg_call_info(op)->flags; } +#if TCG_TARGET_REG_BITS == 32 +static inline TCGv_i32 TCGV_LOW(TCGv_i64 t) +{ + return temp_tcgv_i32(tcgv_i64_temp(t) + HOST_BIG_ENDIAN); +} +static inline TCGv_i32 TCGV_HIGH(TCGv_i64 t) +{ + return temp_tcgv_i32(tcgv_i64_temp(t) + !HOST_BIG_ENDIAN); +} +#else +extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); +extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); +#endif + #endif /* TCG_INTERNAL_H */ diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index 463dabf..966d41d 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -21,6 +21,8 @@ #include "tcg/tcg.h" #include "tcg/tcg-op.h" #include "tcg/tcg-mo.h" +#include "tcg-internal.h" + /* Reduce the number of ifdefs below. This assumes that all uses of TCGV_HIGH and TCGV_LOW are properly protected by a conditional that @@ -150,7 +152,7 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list, void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 2); TCGOP_VECL(op) = type - TCG_TYPE_V64; TCGOP_VECE(op) = vece; op->args[0] = r; @@ -160,7 +162,7 @@ void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 3); TCGOP_VECL(op) = type - TCG_TYPE_V64; TCGOP_VECE(op) = vece; op->args[0] = r; @@ -171,7 +173,7 @@ void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece, void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 4); TCGOP_VECL(op) = type - TCG_TYPE_V64; TCGOP_VECE(op) = vece; op->args[0] = r; @@ -183,7 +185,7 @@ void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece, static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 6); TCGOP_VECL(op) = type - TCG_TYPE_V64; TCGOP_VECE(op) = vece; op->args[0] = r; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 019fab0..cd1cd4e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -28,33 +28,25 @@ #include "tcg/tcg-op.h" #include "tcg/tcg-mo.h" #include "exec/plugin-gen.h" +#include "tcg-internal.h" -/* Reduce the number of ifdefs below. This assumes that all uses of - TCGV_HIGH and TCGV_LOW are properly protected by a conditional that - the compiler can eliminate. */ -#if TCG_TARGET_REG_BITS == 64 -extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64); -extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); -#define TCGV_LOW TCGV_LOW_link_error -#define TCGV_HIGH TCGV_HIGH_link_error -#endif void tcg_gen_op1(TCGOpcode opc, TCGArg a1) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 1); op->args[0] = a1; } void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 2); op->args[0] = a1; op->args[1] = a2; } void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 3); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; @@ -62,7 +54,7 @@ void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 4); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; @@ -72,7 +64,7 @@ void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 5); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; @@ -83,7 +75,7 @@ void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) { - TCGOp *op = tcg_emit_op(opc); + TCGOp *op = tcg_emit_op(opc, 6); op->args[0] = a1; op->args[1] = a2; op->args[2] = a3; @@ -1171,6 +1163,21 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) #endif } +void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); +} + +void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); +} + +void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +} + void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { #if HOST_BIG_ENDIAN @@ -1182,6 +1189,18 @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) #endif } +void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + +void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), + TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); +} + void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); @@ -62,10 +62,6 @@ #include "tcg/tcg-ldst.h" #include "tcg-internal.h" -#ifdef CONFIG_TCG_INTERPRETER -#include <ffi.h> -#endif - /* Forward declarations for functions declared in tcg-target.c.inc and used here. */ static void tcg_target_init(TCGContext *s); @@ -149,12 +145,8 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, intptr_t arg2); static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, TCGReg base, intptr_t ofs); -#ifdef CONFIG_TCG_INTERPRETER static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, - ffi_cif *cif); -#else -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target); -#endif + const TCGHelperInfo *info); static bool tcg_target_const_match(int64_t val, TCGType type, int ct); #ifdef TCG_TARGET_NEED_LDST_LABELS static int tcg_out_ldst_finalize(TCGContext *s); @@ -496,7 +488,7 @@ void *tcg_malloc_internal(TCGContext *s, int size) { TCGPool *p; int pool_size; - + if (size > TCG_POOL_CHUNK_SIZE) { /* big malloc: insert a new pool (XXX: could optimize) */ p = g_malloc(sizeof(TCGPool) + size); @@ -517,10 +509,11 @@ void *tcg_malloc_internal(TCGContext *s, int size) p = g_malloc(sizeof(TCGPool) + pool_size); p->size = pool_size; p->next = NULL; - if (s->pool_current) + if (s->pool_current) { s->pool_current->next = p; - else + } else { s->pool_first = p; + } } else { p = p->next; } @@ -546,23 +539,230 @@ void tcg_pool_reset(TCGContext *s) #include "exec/helper-proto.h" -static const TCGHelperInfo all_helpers[] = { +static TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; static GHashTable *helper_table; #ifdef CONFIG_TCG_INTERPRETER -static GHashTable *ffi_table; - -static ffi_type * const typecode_to_ffi[8] = { - [dh_typecode_void] = &ffi_type_void, - [dh_typecode_i32] = &ffi_type_uint32, - [dh_typecode_s32] = &ffi_type_sint32, - [dh_typecode_i64] = &ffi_type_uint64, - [dh_typecode_s64] = &ffi_type_sint64, - [dh_typecode_ptr] = &ffi_type_pointer, -}; -#endif +static ffi_type *typecode_to_ffi(int argmask) +{ + switch (argmask) { + case dh_typecode_void: + return &ffi_type_void; + case dh_typecode_i32: + return &ffi_type_uint32; + case dh_typecode_s32: + return &ffi_type_sint32; + case dh_typecode_i64: + return &ffi_type_uint64; + case dh_typecode_s64: + return &ffi_type_sint64; + case dh_typecode_ptr: + return &ffi_type_pointer; + } + g_assert_not_reached(); +} + +static void init_ffi_layouts(void) +{ + /* g_direct_hash/equal for direct comparisons on uint32_t. */ + GHashTable *ffi_table = g_hash_table_new(NULL, NULL); + + for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) { + TCGHelperInfo *info = &all_helpers[i]; + unsigned typemask = info->typemask; + gpointer hash = (gpointer)(uintptr_t)typemask; + struct { + ffi_cif cif; + ffi_type *args[]; + } *ca; + ffi_status status; + int nargs; + ffi_cif *cif; + + cif = g_hash_table_lookup(ffi_table, hash); + if (cif) { + info->cif = cif; + continue; + } + + /* Ignoring the return type, find the last non-zero field. */ + nargs = 32 - clz32(typemask >> 3); + nargs = DIV_ROUND_UP(nargs, 3); + + ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); + ca->cif.rtype = typecode_to_ffi(typemask & 7); + ca->cif.nargs = nargs; + + if (nargs != 0) { + ca->cif.arg_types = ca->args; + for (int j = 0; j < nargs; ++j) { + int typecode = extract32(typemask, (j + 1) * 3, 3); + ca->args[j] = typecode_to_ffi(typecode); + } + } + + status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, + ca->cif.rtype, ca->cif.arg_types); + assert(status == FFI_OK); + + cif = &ca->cif; + info->cif = cif; + g_hash_table_insert(ffi_table, hash, (gpointer)cif); + } + + g_hash_table_destroy(ffi_table); +} +#endif /* CONFIG_TCG_INTERPRETER */ + +typedef struct TCGCumulativeArgs { + int arg_idx; /* tcg_gen_callN args[] */ + int info_in_idx; /* TCGHelperInfo in[] */ + int arg_slot; /* regs+stack slot */ + int ref_slot; /* stack slots for references */ +} TCGCumulativeArgs; + +static void layout_arg_even(TCGCumulativeArgs *cum) +{ + cum->arg_slot += cum->arg_slot & 1; +} + +static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, + TCGCallArgumentKind kind) +{ + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; + + *loc = (TCGCallArgumentLoc){ + .kind = kind, + .arg_idx = cum->arg_idx, + .arg_slot = cum->arg_slot, + }; + cum->info_in_idx++; + cum->arg_slot++; +} + +static void layout_arg_normal_n(TCGCumulativeArgs *cum, + TCGHelperInfo *info, int n) +{ + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; + + for (int i = 0; i < n; ++i) { + /* Layout all using the same arg_idx, adjusting the subindex. */ + loc[i] = (TCGCallArgumentLoc){ + .kind = TCG_CALL_ARG_NORMAL, + .arg_idx = cum->arg_idx, + .tmp_subindex = i, + .arg_slot = cum->arg_slot + i, + }; + } + cum->info_in_idx += n; + cum->arg_slot += n; +} + +static void init_call_layout(TCGHelperInfo *info) +{ + int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); + int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); + unsigned typemask = info->typemask; + unsigned typecode; + TCGCumulativeArgs cum = { }; + + /* + * Parse and place any function return value. + */ + typecode = typemask & 7; + switch (typecode) { + case dh_typecode_void: + info->nr_out = 0; + break; + case dh_typecode_i32: + case dh_typecode_s32: + case dh_typecode_ptr: + info->nr_out = 1; + info->out_kind = TCG_CALL_RET_NORMAL; + break; + case dh_typecode_i64: + case dh_typecode_s64: + info->nr_out = 64 / TCG_TARGET_REG_BITS; + info->out_kind = TCG_CALL_RET_NORMAL; + break; + default: + g_assert_not_reached(); + } + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); + + /* + * Parse and place function arguments. + */ + for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { + TCGCallArgumentKind kind; + TCGType type; + + typecode = typemask & 7; + switch (typecode) { + case dh_typecode_i32: + case dh_typecode_s32: + type = TCG_TYPE_I32; + break; + case dh_typecode_i64: + case dh_typecode_s64: + type = TCG_TYPE_I64; + break; + case dh_typecode_ptr: + type = TCG_TYPE_PTR; + break; + default: + g_assert_not_reached(); + } + + switch (type) { + case TCG_TYPE_I32: + switch (TCG_TARGET_CALL_ARG_I32) { + case TCG_CALL_ARG_EVEN: + layout_arg_even(&cum); + /* fall through */ + case TCG_CALL_ARG_NORMAL: + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); + break; + case TCG_CALL_ARG_EXTEND: + kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); + layout_arg_1(&cum, info, kind); + break; + default: + qemu_build_not_reached(); + } + break; + + case TCG_TYPE_I64: + switch (TCG_TARGET_CALL_ARG_I64) { + case TCG_CALL_ARG_EVEN: + layout_arg_even(&cum); + /* fall through */ + case TCG_CALL_ARG_NORMAL: + if (TCG_TARGET_REG_BITS == 32) { + layout_arg_normal_n(&cum, info, 2); + } else { + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); + } + break; + default: + qemu_build_not_reached(); + } + break; + + default: + g_assert_not_reached(); + } + } + info->nr_in = cum.info_in_idx; + + /* Validate that we didn't overrun the input array. */ + assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); + /* Validate the backend has enough argument space. */ + assert(cum.arg_slot <= max_reg_slots + max_stk_slots); + assert(cum.ref_slot <= max_stk_slots); +} static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; static void process_op_defs(TCGContext *s); @@ -603,49 +803,13 @@ static void tcg_context_init(unsigned max_cpus) helper_table = g_hash_table_new(NULL, NULL); for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { + init_call_layout(&all_helpers[i]); g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func, (gpointer)&all_helpers[i]); } #ifdef CONFIG_TCG_INTERPRETER - /* g_direct_hash/equal for direct comparisons on uint32_t. */ - ffi_table = g_hash_table_new(NULL, NULL); - for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) { - struct { - ffi_cif cif; - ffi_type *args[]; - } *ca; - uint32_t typemask = all_helpers[i].typemask; - gpointer hash = (gpointer)(uintptr_t)typemask; - ffi_status status; - int nargs; - - if (g_hash_table_lookup(ffi_table, hash)) { - continue; - } - - /* Ignoring the return type, find the last non-zero field. */ - nargs = 32 - clz32(typemask >> 3); - nargs = DIV_ROUND_UP(nargs, 3); - - ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); - ca->cif.rtype = typecode_to_ffi[typemask & 7]; - ca->cif.nargs = nargs; - - if (nargs != 0) { - ca->cif.arg_types = ca->args; - for (int j = 0; j < nargs; ++j) { - int typecode = extract32(typemask, (j + 1) * 3, 3); - ca->args[j] = typecode_to_ffi[typecode]; - } - } - - status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs, - ca->cif.rtype, ca->cif.arg_types); - assert(status == FFI_OK); - - g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif); - } + init_ffi_layouts(); #endif tcg_target_init(s); @@ -886,10 +1050,7 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, TCGContext *s = tcg_ctx; TCGTemp *base_ts = tcgv_ptr_temp(base); TCGTemp *ts = tcg_global_alloc(s); - int indirect_reg = 0, bigendian = 0; -#if HOST_BIG_ENDIAN - bigendian = 1; -#endif + int indirect_reg = 0; switch (base_ts->kind) { case TEMP_FIXED: @@ -915,7 +1076,7 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, ts->indirect_reg = indirect_reg; ts->mem_allocated = 1; ts->mem_base = base_ts; - ts->mem_offset = offset + bigendian * 4; + ts->mem_offset = offset; pstrcpy(buf, sizeof(buf), name); pstrcat(buf, sizeof(buf), "_0"); ts->name = strdup(buf); @@ -926,7 +1087,8 @@ TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base, ts2->indirect_reg = indirect_reg; ts2->mem_allocated = 1; ts2->mem_base = base_ts; - ts2->mem_offset = offset + (1 - bigendian) * 4; + ts2->mem_offset = offset + 4; + ts2->temp_subindex = 1; pstrcpy(buf, sizeof(buf), name); pstrcat(buf, sizeof(buf), "_1"); ts2->name = strdup(buf); @@ -973,6 +1135,7 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) ts2->base_type = TCG_TYPE_I64; ts2->type = TCG_TYPE_I32; ts2->temp_allocated = 1; + ts2->temp_subindex = 1; ts2->kind = kind; } else { ts->base_type = type; @@ -1070,36 +1233,43 @@ TCGTemp *tcg_constant_internal(TCGType type, int64_t val) ts = g_hash_table_lookup(h, &val); if (ts == NULL) { + int64_t *val_ptr; + ts = tcg_temp_alloc(s); if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { TCGTemp *ts2 = tcg_temp_alloc(s); + tcg_debug_assert(ts2 == ts + 1); + ts->base_type = TCG_TYPE_I64; ts->type = TCG_TYPE_I32; ts->kind = TEMP_CONST; ts->temp_allocated = 1; - /* - * Retain the full value of the 64-bit constant in the low - * part, so that the hash table works. Actual uses will - * truncate the value to the low part. - */ - ts->val = val; - tcg_debug_assert(ts2 == ts + 1); ts2->base_type = TCG_TYPE_I64; ts2->type = TCG_TYPE_I32; ts2->kind = TEMP_CONST; ts2->temp_allocated = 1; - ts2->val = val >> 32; + ts2->temp_subindex = 1; + + /* + * Retain the full value of the 64-bit constant in the low + * part, so that the hash table works. Actual uses will + * truncate the value to the low part. + */ + ts[HOST_BIG_ENDIAN].val = val; + ts[!HOST_BIG_ENDIAN].val = val >> 32; + val_ptr = &ts[HOST_BIG_ENDIAN].val; } else { ts->base_type = type; ts->type = type; ts->kind = TEMP_CONST; ts->temp_allocated = 1; ts->val = val; + val_ptr = &ts->val; } - g_hash_table_insert(h, &ts->val, ts); + g_hash_table_insert(h, val_ptr, ts); } return ts; @@ -1467,18 +1637,19 @@ bool tcg_op_supported(TCGOpcode op) } } -/* Note: we convert the 64 bit args to 32 bit and do some alignment - and endian swap. Maybe it would be better to do the alignment - and endian swap in tcg_reg_alloc_call(). */ +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs); + void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) { - int i, real_args, nb_rets, pi; - unsigned typemask; const TCGHelperInfo *info; + TCGv_i64 extend_free[MAX_CALL_IARGS]; + int n_extend = 0; TCGOp *op; + int i, n, pi = 0, total_args; info = g_hash_table_lookup(helper_table, (gpointer)func); - typemask = info->typemask; + total_args = info->nr_out + info->nr_in + 2; + op = tcg_op_alloc(INDEX_op_call, total_args); #ifdef CONFIG_PLUGIN /* detect non-plugin helpers */ @@ -1487,114 +1658,66 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) } #endif -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 - for (i = 0; i < nargs; ++i) { - int argtype = extract32(typemask, (i + 1) * 3, 3); - bool is_32bit = (argtype & ~1) == dh_typecode_i32; - bool is_signed = argtype & 1; - - if (is_32bit) { - TCGv_i64 temp = tcg_temp_new_i64(); - TCGv_i32 orig = temp_tcgv_i32(args[i]); - if (is_signed) { - tcg_gen_ext_i32_i64(temp, orig); - } else { - tcg_gen_extu_i32_i64(temp, orig); - } - args[i] = tcgv_i64_temp(temp); - } + TCGOP_CALLO(op) = n = info->nr_out; + switch (n) { + case 0: + tcg_debug_assert(ret == NULL); + break; + case 1: + tcg_debug_assert(ret != NULL); + op->args[pi++] = temp_arg(ret); + break; + case 2: + tcg_debug_assert(ret != NULL); + tcg_debug_assert(ret->base_type == ret->type + 1); + tcg_debug_assert(ret->temp_subindex == 0); + op->args[pi++] = temp_arg(ret); + op->args[pi++] = temp_arg(ret + 1); + break; + default: + g_assert_not_reached(); } -#endif /* TCG_TARGET_EXTEND_ARGS */ - op = tcg_emit_op(INDEX_op_call); - - pi = 0; - if (ret != NULL) { - if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { -#if HOST_BIG_ENDIAN - op->args[pi++] = temp_arg(ret + 1); - op->args[pi++] = temp_arg(ret); -#else - op->args[pi++] = temp_arg(ret); - op->args[pi++] = temp_arg(ret + 1); -#endif - nb_rets = 2; - } else { - op->args[pi++] = temp_arg(ret); - nb_rets = 1; - } - } else { - nb_rets = 0; - } - TCGOP_CALLO(op) = nb_rets; + TCGOP_CALLI(op) = n = info->nr_in; + for (i = 0; i < n; i++) { + const TCGCallArgumentLoc *loc = &info->in[i]; + TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex; - real_args = 0; - for (i = 0; i < nargs; i++) { - int argtype = extract32(typemask, (i + 1) * 3, 3); - bool is_64bit = (argtype & ~1) == dh_typecode_i64; - bool want_align = false; + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + op->args[pi++] = temp_arg(ts); + break; -#if defined(CONFIG_TCG_INTERPRETER) - /* - * Align all arguments, so that they land in predictable places - * for passing off to ffi_call. - */ - want_align = true; -#elif defined(TCG_TARGET_CALL_ALIGN_ARGS) - /* Some targets want aligned 64 bit args */ - want_align = is_64bit; -#endif + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + { + TCGv_i64 temp = tcg_temp_new_i64(); + TCGv_i32 orig = temp_tcgv_i32(ts); - if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) { - op->args[pi++] = TCG_CALL_DUMMY_ARG; - real_args++; - } + if (loc->kind == TCG_CALL_ARG_EXTEND_S) { + tcg_gen_ext_i32_i64(temp, orig); + } else { + tcg_gen_extu_i32_i64(temp, orig); + } + op->args[pi++] = tcgv_i64_arg(temp); + extend_free[n_extend++] = temp; + } + break; - if (TCG_TARGET_REG_BITS < 64 && is_64bit) { - /* - * If stack grows up, then we will be placing successive - * arguments at lower addresses, which means we need to - * reverse the order compared to how we would normally - * treat either big or little-endian. For those arguments - * that will wind up in registers, this still works for - * HPPA (the only current STACK_GROWSUP target) since the - * argument registers are *also* allocated in decreasing - * order. If another such target is added, this logic may - * have to get more complicated to differentiate between - * stack arguments and register arguments. - */ -#if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP) - op->args[pi++] = temp_arg(args[i] + 1); - op->args[pi++] = temp_arg(args[i]); -#else - op->args[pi++] = temp_arg(args[i]); - op->args[pi++] = temp_arg(args[i] + 1); -#endif - real_args += 2; - continue; + default: + g_assert_not_reached(); } - - op->args[pi++] = temp_arg(args[i]); - real_args++; } op->args[pi++] = (uintptr_t)func; op->args[pi++] = (uintptr_t)info; - TCGOP_CALLI(op) = real_args; + tcg_debug_assert(pi == total_args); - /* Make sure the fields didn't overflow. */ - tcg_debug_assert(TCGOP_CALLI(op) == real_args); - tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); - -#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 - for (i = 0; i < nargs; ++i) { - int argtype = extract32(typemask, (i + 1) * 3, 3); - bool is_32bit = (argtype & ~1) == dh_typecode_i32; + QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); - if (is_32bit) { - tcg_temp_free_internal(args[i]); - } + tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free)); + for (i = 0; i < n_extend; ++i) { + tcg_temp_free_i64(extend_free[i]); } -#endif /* TCG_TARGET_EXTEND_ARGS */ } static void tcg_reg_alloc_start(TCGContext *s) @@ -1809,10 +1932,7 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } for (i = 0; i < nb_iargs; i++) { TCGArg arg = op->args[nb_oargs + i]; - const char *t = "<dummy>"; - if (arg != TCG_CALL_DUMMY_ARG) { - t = tcg_get_arg_str(s, buf, sizeof(buf), arg); - } + const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg); col += ne_fprintf(f, ",%s", t); } } else { @@ -1953,7 +2073,7 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) if (have_prefs) { for (i = 0; i < nb_oargs; ++i) { - TCGRegSet set = op->output_pref[i]; + TCGRegSet set = output_pref(op, i); if (i == 0) { ne_fprintf(f, " pref="); @@ -1985,15 +2105,32 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) static int get_constraint_priority(const TCGOpDef *def, int k) { const TCGArgConstraint *arg_ct = &def->args_ct[k]; - int n; + int n = ctpop64(arg_ct->regs); - if (arg_ct->oalias) { - /* an alias is equivalent to a single register */ - n = 1; - } else { - n = ctpop64(arg_ct->regs); + /* + * Sort constraints of a single register first, which includes output + * aliases (which must exactly match the input already allocated). + */ + if (n == 1 || arg_ct->oalias) { + return INT_MAX; + } + + /* + * Sort register pairs next, first then second immediately after. + * Arbitrarily sort multiple pairs by the index of the first reg; + * there shouldn't be many pairs. + */ + switch (arg_ct->pair) { + case 1: + case 3: + return (k + 1) * 2; + case 2: + return (arg_ct->pair_index + 1) * 2 - 1; } - return TCG_TARGET_NB_REGS - n + 1; + + /* Finally, sort by decreasing register count. */ + assert(n > 1); + return -n; } /* sort from highest priority to lowest */ @@ -2028,7 +2165,8 @@ static void process_op_defs(TCGContext *s) for (op = 0; op < NB_OPS; op++) { TCGOpDef *def = &tcg_op_defs[op]; const TCGTargetOpDef *tdefs; - int i, nb_args; + bool saw_alias_pair = false; + int i, o, i2, o2, nb_args; if (def->flags & TCG_OPF_NOT_PRESENT) { continue; @@ -2050,58 +2188,175 @@ static void process_op_defs(TCGContext *s) for (i = 0; i < nb_args; i++) { const char *ct_str = tdefs->args_ct_str[i]; + bool input_p = i >= def->nb_oargs; + /* Incomplete TCGTargetOpDef entry. */ tcg_debug_assert(ct_str != NULL); - while (*ct_str != '\0') { - switch(*ct_str) { - case '0' ... '9': - { - int oarg = *ct_str - '0'; - tcg_debug_assert(ct_str == tdefs->args_ct_str[i]); - tcg_debug_assert(oarg < def->nb_oargs); - tcg_debug_assert(def->args_ct[oarg].regs != 0); - def->args_ct[i] = def->args_ct[oarg]; - /* The output sets oalias. */ - def->args_ct[oarg].oalias = true; - def->args_ct[oarg].alias_index = i; - /* The input sets ialias. */ - def->args_ct[i].ialias = true; - def->args_ct[i].alias_index = oarg; - } - ct_str++; - break; - case '&': - def->args_ct[i].newreg = true; - ct_str++; - break; + switch (*ct_str) { + case '0' ... '9': + o = *ct_str - '0'; + tcg_debug_assert(input_p); + tcg_debug_assert(o < def->nb_oargs); + tcg_debug_assert(def->args_ct[o].regs != 0); + tcg_debug_assert(!def->args_ct[o].oalias); + def->args_ct[i] = def->args_ct[o]; + /* The output sets oalias. */ + def->args_ct[o].oalias = 1; + def->args_ct[o].alias_index = i; + /* The input sets ialias. */ + def->args_ct[i].ialias = 1; + def->args_ct[i].alias_index = o; + if (def->args_ct[i].pair) { + saw_alias_pair = true; + } + tcg_debug_assert(ct_str[1] == '\0'); + continue; + + case '&': + tcg_debug_assert(!input_p); + def->args_ct[i].newreg = true; + ct_str++; + break; + + case 'p': /* plus */ + /* Allocate to the register after the previous. */ + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + o = i - 1; + tcg_debug_assert(!def->args_ct[o].pair); + tcg_debug_assert(!def->args_ct[o].ct); + def->args_ct[i] = (TCGArgConstraint){ + .pair = 2, + .pair_index = o, + .regs = def->args_ct[o].regs << 1, + }; + def->args_ct[o].pair = 1; + def->args_ct[o].pair_index = i; + tcg_debug_assert(ct_str[1] == '\0'); + continue; + + case 'm': /* minus */ + /* Allocate to the register before the previous. */ + tcg_debug_assert(i > (input_p ? def->nb_oargs : 0)); + o = i - 1; + tcg_debug_assert(!def->args_ct[o].pair); + tcg_debug_assert(!def->args_ct[o].ct); + def->args_ct[i] = (TCGArgConstraint){ + .pair = 1, + .pair_index = o, + .regs = def->args_ct[o].regs >> 1, + }; + def->args_ct[o].pair = 2; + def->args_ct[o].pair_index = i; + tcg_debug_assert(ct_str[1] == '\0'); + continue; + } + + do { + switch (*ct_str) { case 'i': def->args_ct[i].ct |= TCG_CT_CONST; - ct_str++; break; /* Include all of the target-specific constraints. */ #undef CONST #define CONST(CASE, MASK) \ - case CASE: def->args_ct[i].ct |= MASK; ct_str++; break; + case CASE: def->args_ct[i].ct |= MASK; break; #define REGS(CASE, MASK) \ - case CASE: def->args_ct[i].regs |= MASK; ct_str++; break; + case CASE: def->args_ct[i].regs |= MASK; break; #include "tcg-target-con-str.h" #undef REGS #undef CONST default: + case '0' ... '9': + case '&': + case 'p': + case 'm': /* Typo in TCGTargetOpDef constraint. */ g_assert_not_reached(); } - } + } while (*++ct_str != '\0'); } /* TCGTargetOpDef entry with too much information? */ tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); + /* + * Fix up output pairs that are aliased with inputs. + * When we created the alias, we copied pair from the output. + * There are three cases: + * (1a) Pairs of inputs alias pairs of outputs. + * (1b) One input aliases the first of a pair of outputs. + * (2) One input aliases the second of a pair of outputs. + * + * Case 1a is handled by making sure that the pair_index'es are + * properly updated so that they appear the same as a pair of inputs. + * + * Case 1b is handled by setting the pair_index of the input to + * itself, simply so it doesn't point to an unrelated argument. + * Since we don't encounter the "second" during the input allocation + * phase, nothing happens with the second half of the input pair. + * + * Case 2 is handled by setting the second input to pair=3, the + * first output to pair=3, and the pair_index'es to match. + */ + if (saw_alias_pair) { + for (i = def->nb_oargs; i < nb_args; i++) { + /* + * Since [0-9pm] must be alone in the constraint string, + * the only way they can both be set is if the pair comes + * from the output alias. + */ + if (!def->args_ct[i].ialias) { + continue; + } + switch (def->args_ct[i].pair) { + case 0: + break; + case 1: + o = def->args_ct[i].alias_index; + o2 = def->args_ct[o].pair_index; + tcg_debug_assert(def->args_ct[o].pair == 1); + tcg_debug_assert(def->args_ct[o2].pair == 2); + if (def->args_ct[o2].oalias) { + /* Case 1a */ + i2 = def->args_ct[o2].alias_index; + tcg_debug_assert(def->args_ct[i2].pair == 2); + def->args_ct[i2].pair_index = i; + def->args_ct[i].pair_index = i2; + } else { + /* Case 1b */ + def->args_ct[i].pair_index = i; + } + break; + case 2: + o = def->args_ct[i].alias_index; + o2 = def->args_ct[o].pair_index; + tcg_debug_assert(def->args_ct[o].pair == 2); + tcg_debug_assert(def->args_ct[o2].pair == 1); + if (def->args_ct[o2].oalias) { + /* Case 1a */ + i2 = def->args_ct[o2].alias_index; + tcg_debug_assert(def->args_ct[i2].pair == 1); + def->args_ct[i2].pair_index = i; + def->args_ct[i].pair_index = i2; + } else { + /* Case 2 */ + def->args_ct[i].pair = 3; + def->args_ct[o2].pair = 3; + def->args_ct[i].pair_index = o2; + def->args_ct[o2].pair_index = i; + } + break; + default: + g_assert_not_reached(); + } + } + } + /* sort the constraints (XXX: this is just an heuristic) */ sort_constraints(def, 0, def->nb_oargs); sort_constraints(def, def->nb_oargs, def->nb_iargs); @@ -2152,41 +2407,56 @@ void tcg_remove_ops_after(TCGOp *op) } } -static TCGOp *tcg_op_alloc(TCGOpcode opc) +static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs) { TCGContext *s = tcg_ctx; - TCGOp *op; - - if (likely(QTAILQ_EMPTY(&s->free_ops))) { - op = tcg_malloc(sizeof(TCGOp)); - } else { - op = QTAILQ_FIRST(&s->free_ops); - QTAILQ_REMOVE(&s->free_ops, op, link); + TCGOp *op = NULL; + + if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) { + QTAILQ_FOREACH(op, &s->free_ops, link) { + if (nargs <= op->nargs) { + QTAILQ_REMOVE(&s->free_ops, op, link); + nargs = op->nargs; + goto found; + } + } } + + /* Most opcodes have 3 or 4 operands: reduce fragmentation. */ + nargs = MAX(4, nargs); + op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs); + + found: memset(op, 0, offsetof(TCGOp, link)); op->opc = opc; - s->nb_ops++; + op->nargs = nargs; + + /* Check for bitfield overflow. */ + tcg_debug_assert(op->nargs == nargs); + s->nb_ops++; return op; } -TCGOp *tcg_emit_op(TCGOpcode opc) +TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) { - TCGOp *op = tcg_op_alloc(opc); + TCGOp *op = tcg_op_alloc(opc, nargs); QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); return op; } -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, unsigned nargs) { - TCGOp *new_op = tcg_op_alloc(opc); + TCGOp *new_op = tcg_op_alloc(opc, nargs); QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, unsigned nargs) { - TCGOp *new_op = tcg_op_alloc(opc); + TCGOp *new_op = tcg_op_alloc(opc, nargs); QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } @@ -2443,12 +2713,11 @@ static void liveness_pass_1(TCGContext *s) switch (opc) { case INDEX_op_call: { - int call_flags; - int nb_call_regs; + const TCGHelperInfo *info = tcg_call_info(op); + int call_flags = tcg_call_flags(op); nb_oargs = TCGOP_CALLO(op); nb_iargs = TCGOP_CALLI(op); - call_flags = tcg_call_flags(op); /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { @@ -2473,11 +2742,11 @@ static void liveness_pass_1(TCGContext *s) } ts->state = TS_DEAD; la_reset_pref(ts); - - /* Not used -- it will be tcg_target_call_oarg_regs[i]. */ - op->output_pref[i] = 0; } + /* Not used -- it will be tcg_target_call_oarg_reg(). */ + memset(op->output_pref, 0, sizeof(op->output_pref)); + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | TCG_CALL_NO_READ_GLOBALS))) { la_global_kill(s, nb_globals); @@ -2488,7 +2757,7 @@ static void liveness_pass_1(TCGContext *s) /* Record arguments that die in this helper. */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { ts = arg_temp(op->args[i]); - if (ts && ts->state & TS_DEAD) { + if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; } } @@ -2496,31 +2765,59 @@ static void liveness_pass_1(TCGContext *s) /* For all live registers, remove call-clobbered prefs. */ la_cross_call(s, nb_temps); - nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); + /* + * Input arguments are live for preceding opcodes. + * + * For those arguments that die, and will be allocated in + * registers, clear the register set for that arg, to be + * filled in below. For args that will be on the stack, + * reset to any available reg. Process arguments in reverse + * order so that if a temp is used more than once, the stack + * reset to max happens before the register reset to 0. + */ + for (i = nb_iargs - 1; i >= 0; i--) { + const TCGCallArgumentLoc *loc = &info->in[i]; + ts = arg_temp(op->args[nb_oargs + i]); - /* Input arguments are live for preceding opcodes. */ - for (i = 0; i < nb_iargs; i++) { - ts = arg_temp(op->args[i + nb_oargs]); - if (ts && ts->state & TS_DEAD) { - /* For those arguments that die, and will be allocated - * in registers, clear the register set for that arg, - * to be filled in below. For args that will be on - * the stack, reset to any available reg. - */ - *la_temp_pref(ts) - = (i < nb_call_regs ? 0 : - tcg_target_available_regs[ts->type]); + if (ts->state & TS_DEAD) { + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + if (REG_P(loc)) { + *la_temp_pref(ts) = 0; + break; + } + /* fall through */ + default: + *la_temp_pref(ts) = + tcg_target_available_regs[ts->type]; + break; + } ts->state &= ~TS_DEAD; } } - /* For each input argument, add its input register to prefs. - If a temp is used once, this produces a single set bit. */ - for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) { - ts = arg_temp(op->args[i + nb_oargs]); - if (ts) { - tcg_regset_set_reg(*la_temp_pref(ts), - tcg_target_call_iarg_regs[i]); + /* + * For each input argument, add its input register to prefs. + * If a temp is used once, this produces a single set bit; + * if a temp is used multiple times, this produces a set. + */ + for (i = 0; i < nb_iargs; i++) { + const TCGCallArgumentLoc *loc = &info->in[i]; + ts = arg_temp(op->args[nb_oargs + i]); + + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + if (REG_P(loc)) { + tcg_regset_set_reg(*la_temp_pref(ts), + tcg_target_call_iarg_regs[loc->arg_slot]); + } + break; + default: + break; } } } @@ -2639,7 +2936,9 @@ static void liveness_pass_1(TCGContext *s) ts = arg_temp(op->args[i]); /* Remember the preference of the uses that followed. */ - op->output_pref[i] = *la_temp_pref(ts); + if (i < ARRAY_SIZE(op->output_pref)) { + op->output_pref[i] = *la_temp_pref(ts); + } /* Output args are dead. */ if (ts->state & TS_DEAD) { @@ -2709,7 +3008,7 @@ static void liveness_pass_1(TCGContext *s) set &= ct->regs; if (ct->ialias) { - set &= op->output_pref[ct->alias_index]; + set &= output_pref(op, ct->alias_index); } /* If the combination is not possible, restart. */ if (set == 0) { @@ -2789,21 +3088,19 @@ static bool liveness_pass_2(TCGContext *s) /* Make sure that input arguments are available. */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg_ts = arg_temp(op->args[i]); - if (arg_ts) { - dir_ts = arg_ts->state_ptr; - if (dir_ts && arg_ts->state == TS_DEAD) { - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_ld_i32 - : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc); - - lop->args[0] = temp_arg(dir_ts); - lop->args[1] = temp_arg(arg_ts->mem_base); - lop->args[2] = arg_ts->mem_offset; - - /* Loaded, but synced with memory. */ - arg_ts->state = TS_MEM; - } + dir_ts = arg_ts->state_ptr; + if (dir_ts && arg_ts->state == TS_DEAD) { + TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 + ? INDEX_op_ld_i32 + : INDEX_op_ld_i64); + TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + + lop->args[0] = temp_arg(dir_ts); + lop->args[1] = temp_arg(arg_ts->mem_base); + lop->args[2] = arg_ts->mem_offset; + + /* Loaded, but synced with memory. */ + arg_ts->state = TS_MEM; } } @@ -2812,14 +3109,12 @@ static bool liveness_pass_2(TCGContext *s) so that we reload when needed. */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { arg_ts = arg_temp(op->args[i]); - if (arg_ts) { - dir_ts = arg_ts->state_ptr; - if (dir_ts) { - op->args[i] = temp_arg(dir_ts); - changes = true; - if (IS_DEAD_ARG(i)) { - arg_ts->state = TS_DEAD; - } + dir_ts = arg_ts->state_ptr; + if (dir_ts) { + op->args[i] = temp_arg(dir_ts); + changes = true; + if (IS_DEAD_ARG(i)) { + arg_ts->state = TS_DEAD; } } } @@ -2861,7 +3156,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); TCGTemp *out_ts = dir_ts; if (IS_DEAD_ARG(0)) { @@ -2897,7 +3192,7 @@ static bool liveness_pass_2(TCGContext *s) TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 ? INDEX_op_st_i32 : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc); + TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); @@ -2916,97 +3211,24 @@ static bool liveness_pass_2(TCGContext *s) return changes; } -#ifdef CONFIG_DEBUG_TCG -static void dump_regs(TCGContext *s) -{ - TCGTemp *ts; - int i; - char buf[64]; - - for(i = 0; i < s->nb_temps; i++) { - ts = &s->temps[i]; - printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); - switch(ts->val_type) { - case TEMP_VAL_REG: - printf("%s", tcg_target_reg_names[ts->reg]); - break; - case TEMP_VAL_MEM: - printf("%d(%s)", (int)ts->mem_offset, - tcg_target_reg_names[ts->mem_base->reg]); - break; - case TEMP_VAL_CONST: - printf("$0x%" PRIx64, ts->val); - break; - case TEMP_VAL_DEAD: - printf("D"); - break; - default: - printf("???"); - break; - } - printf("\n"); - } - - for(i = 0; i < TCG_TARGET_NB_REGS; i++) { - if (s->reg_to_temp[i] != NULL) { - printf("%s: %s\n", - tcg_target_reg_names[i], - tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i])); - } - } -} - -static void check_regs(TCGContext *s) -{ - int reg; - int k; - TCGTemp *ts; - char buf[64]; - - for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { - ts = s->reg_to_temp[reg]; - if (ts != NULL) { - if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) { - printf("Inconsistency for register %s:\n", - tcg_target_reg_names[reg]); - goto fail; - } - } - } - for (k = 0; k < s->nb_temps; k++) { - ts = &s->temps[k]; - if (ts->val_type == TEMP_VAL_REG - && ts->kind != TEMP_FIXED - && s->reg_to_temp[ts->reg] != ts) { - printf("Inconsistency for temp %s:\n", - tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts)); - fail: - printf("reg state:\n"); - dump_regs(s); - tcg_abort(); - } - } -} -#endif - static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) { - intptr_t off, size, align; + int size = tcg_type_size(ts->type); + int align; + intptr_t off; switch (ts->type) { case TCG_TYPE_I32: - size = align = 4; + align = 4; break; case TCG_TYPE_I64: case TCG_TYPE_V64: - size = align = 8; + align = 8; break; case TCG_TYPE_V128: - size = align = 16; - break; case TCG_TYPE_V256: /* Note that we do not require aligned storage for V256. */ - size = 32, align = 16; + align = 16; break; default: g_assert_not_reached(); @@ -3035,6 +3257,35 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) ts->mem_allocated = 1; } +/* Assign @reg to @ts, and update reg_to_temp[]. */ +static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg) +{ + if (ts->val_type == TEMP_VAL_REG) { + TCGReg old = ts->reg; + tcg_debug_assert(s->reg_to_temp[old] == ts); + if (old == reg) { + return; + } + s->reg_to_temp[old] = NULL; + } + tcg_debug_assert(s->reg_to_temp[reg] == NULL); + s->reg_to_temp[reg] = ts; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; +} + +/* Assign a non-register value type to @ts, and update reg_to_temp[]. */ +static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type) +{ + tcg_debug_assert(type != TEMP_VAL_REG); + if (ts->val_type == TEMP_VAL_REG) { + TCGReg reg = ts->reg; + tcg_debug_assert(s->reg_to_temp[reg] == ts); + s->reg_to_temp[reg] = NULL; + } + ts->val_type = type; +} + static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet); /* Mark a temporary as free or dead. If 'free_or_dead' is negative, @@ -3060,10 +3311,7 @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead) default: g_assert_not_reached(); } - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = new_type; + set_temp_val_nonreg(s, ts, new_type); } /* Mark a temporary as dead. */ @@ -3197,6 +3445,52 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs, tcg_abort(); } +static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs, + TCGRegSet allocated_regs, + TCGRegSet preferred_regs, bool rev) +{ + int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order); + TCGRegSet reg_ct[2]; + const int *order; + + /* Ensure that if I is not in allocated_regs, I+1 is not either. */ + reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1)); + tcg_debug_assert(reg_ct[1] != 0); + reg_ct[0] = reg_ct[1] & preferred_regs; + + order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order; + + /* + * Skip the preferred_regs option if it cannot be satisfied, + * or if the preference made no difference. + */ + k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1]; + + /* + * Minimize the number of flushes by looking for 2 free registers first, + * then a single flush, then two flushes. + */ + for (fmin = 2; fmin >= 0; fmin--) { + for (j = k; j < 2; j++) { + TCGRegSet set = reg_ct[j]; + + for (i = 0; i < n; i++) { + TCGReg reg = order[i]; + + if (tcg_regset_test_reg(set, reg)) { + int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1]; + if (f >= fmin) { + tcg_reg_free(s, reg, allocated_regs); + tcg_reg_free(s, reg + 1, allocated_regs); + return reg; + } + } + } + } + } + tcg_abort(); +} + /* Make sure the temporary is in a register. If needed, allocate the register from DESIRED while avoiding ALLOCATED. */ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, @@ -3243,9 +3537,7 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs, default: tcg_abort(); } - ts->reg = reg; - ts->val_type = TEMP_VAL_REG; - s->reg_to_temp[reg] = ts; + set_temp_val_reg(s, ts, reg); } /* Save a temporary to memory. 'allocated_regs' is used in case a @@ -3357,10 +3649,7 @@ static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots, tcg_debug_assert(!temp_readonly(ots)); /* The movi is not explicitly generated here. */ - if (ots->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ots->reg] = NULL; - } - ots->val_type = TEMP_VAL_CONST; + set_temp_val_nonreg(s, ots, TEMP_VAL_CONST); ots->val = val; ots->mem_coherent = 0; if (NEED_SYNC_ARG(0)) { @@ -3379,9 +3668,10 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) TCGRegSet allocated_regs, preferred_regs; TCGTemp *ts, *ots; TCGType otype, itype; + TCGReg oreg, ireg; allocated_regs = s->reserved_regs; - preferred_regs = op->output_pref[0]; + preferred_regs = output_pref(op, 0); ots = arg_temp(op->args[0]); ts = arg_temp(op->args[1]); @@ -3410,8 +3700,9 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs, preferred_regs); } - tcg_debug_assert(ts->val_type == TEMP_VAL_REG); + ireg = ts->reg; + if (IS_DEAD_ARG(0)) { /* mov to a non-saved dead register makes no sense (even with liveness analysis disabled). */ @@ -3419,52 +3710,53 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op) if (!ots->mem_allocated) { temp_allocate_frame(s, ots); } - tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset); + tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset); if (IS_DEAD_ARG(1)) { temp_dead(s, ts); } temp_dead(s, ots); + return; + } + + if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { + /* + * The mov can be suppressed. Kill input first, so that it + * is unlinked from reg_to_temp, then set the output to the + * reg that we saved from the input. + */ + temp_dead(s, ts); + oreg = ireg; } else { - if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) { - /* the mov can be suppressed */ - if (ots->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ots->reg] = NULL; - } - ots->reg = ts->reg; - temp_dead(s, ts); + if (ots->val_type == TEMP_VAL_REG) { + oreg = ots->reg; } else { - if (ots->val_type != TEMP_VAL_REG) { - /* When allocating a new register, make sure to not spill the - input one. */ - tcg_regset_set_reg(allocated_regs, ts->reg); - ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype], - allocated_regs, preferred_regs, - ots->indirect_base); - } - if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) { - /* - * Cross register class move not supported. - * Store the source register into the destination slot - * and leave the destination temp as TEMP_VAL_MEM. - */ - assert(!temp_readonly(ots)); - if (!ts->mem_allocated) { - temp_allocate_frame(s, ots); - } - tcg_out_st(s, ts->type, ts->reg, - ots->mem_base->reg, ots->mem_offset); - ots->mem_coherent = 1; - temp_free_or_dead(s, ots, -1); - return; - } + /* Make sure to not spill the input register during allocation. */ + oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype], + allocated_regs | ((TCGRegSet)1 << ireg), + preferred_regs, ots->indirect_base); } - ots->val_type = TEMP_VAL_REG; - ots->mem_coherent = 0; - s->reg_to_temp[ots->reg] = ots; - if (NEED_SYNC_ARG(0)) { - temp_sync(s, ots, allocated_regs, 0, 0); + if (!tcg_out_mov(s, otype, oreg, ireg)) { + /* + * Cross register class move not supported. + * Store the source register into the destination slot + * and leave the destination temp as TEMP_VAL_MEM. + */ + assert(!temp_readonly(ots)); + if (!ts->mem_allocated) { + temp_allocate_frame(s, ots); + } + tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset); + set_temp_val_nonreg(s, ts, TEMP_VAL_MEM); + ots->mem_coherent = 1; + return; } } + set_temp_val_reg(s, ots, oreg); + ots->mem_coherent = 0; + + if (NEED_SYNC_ARG(0)) { + temp_sync(s, ots, allocated_regs, 0, 0); + } } /* @@ -3476,8 +3768,8 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) TCGRegSet dup_out_regs, dup_in_regs; TCGTemp *its, *ots; TCGType itype, vtype; - intptr_t endian_fixup; unsigned vece; + int lowpart_ofs; bool ok; ots = arg_temp(op->args[0]); @@ -3496,7 +3788,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) if (IS_DEAD_ARG(1)) { temp_dead(s, its); } - tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); + tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0)); return; } @@ -3506,16 +3798,15 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) /* Allocate the output register now. */ if (ots->val_type != TEMP_VAL_REG) { TCGRegSet allocated_regs = s->reserved_regs; + TCGReg oreg; if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { /* Make sure to not spill the input register. */ tcg_regset_set_reg(allocated_regs, its->reg); } - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, - op->output_pref[0], ots->indirect_base); - ots->val_type = TEMP_VAL_REG; - ots->mem_coherent = 0; - s->reg_to_temp[ots->reg] = ots; + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, + output_pref(op, 0), ots->indirect_base); + set_temp_val_reg(s, ots, oreg); } switch (its->val_type) { @@ -3546,16 +3837,15 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) /* fall through */ case TEMP_VAL_MEM: -#if HOST_BIG_ENDIAN - endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8; - endian_fixup -= 1 << vece; -#else - endian_fixup = 0; -#endif + lowpart_ofs = 0; + if (HOST_BIG_ENDIAN) { + lowpart_ofs = tcg_type_size(itype) - (1 << vece); + } if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg, - its->mem_offset + endian_fixup)) { + its->mem_offset + lowpart_ofs)) { goto done; } + /* Load the input into the destination vector register. */ tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset); break; @@ -3568,6 +3858,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) tcg_debug_assert(ok); done: + ots->mem_coherent = 0; if (IS_DEAD_ARG(1)) { temp_dead(s, its); } @@ -3597,16 +3888,19 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) nb_iargs = def->nb_iargs; /* copy constants */ - memcpy(new_args + nb_oargs + nb_iargs, + memcpy(new_args + nb_oargs + nb_iargs, op->args + nb_oargs + nb_iargs, sizeof(TCGArg) * def->nb_cargs); i_allocated_regs = s->reserved_regs; o_allocated_regs = s->reserved_regs; - /* satisfy input constraints */ + /* satisfy input constraints */ for (k = 0; k < nb_iargs; k++) { - TCGRegSet i_preferred_regs, o_preferred_regs; + TCGRegSet i_preferred_regs, i_required_regs; + bool allocate_new_reg, copyto_new_reg; + TCGTemp *ts2; + int i1, i2; i = def->args_ct[nb_oargs + k].sort_index; arg = op->args[i]; @@ -3621,49 +3915,166 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) continue; } - i_preferred_regs = o_preferred_regs = 0; - if (arg_ct->ialias) { - o_preferred_regs = op->output_pref[arg_ct->alias_index]; + reg = ts->reg; + i_preferred_regs = 0; + i_required_regs = arg_ct->regs; + allocate_new_reg = false; + copyto_new_reg = false; + + switch (arg_ct->pair) { + case 0: /* not paired */ + if (arg_ct->ialias) { + i_preferred_regs = output_pref(op, arg_ct->alias_index); + + /* + * If the input is readonly, then it cannot also be an + * output and aliased to itself. If the input is not + * dead after the instruction, we must allocate a new + * register and move it. + */ + if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { + allocate_new_reg = true; + } else if (ts->val_type == TEMP_VAL_REG) { + /* + * Check if the current register has already been + * allocated for another input. + */ + allocate_new_reg = + tcg_regset_test_reg(i_allocated_regs, reg); + } + } + if (!allocate_new_reg) { + temp_load(s, ts, i_required_regs, i_allocated_regs, + i_preferred_regs); + reg = ts->reg; + allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg); + } + if (allocate_new_reg) { + /* + * Allocate a new register matching the constraint + * and move the temporary register into it. + */ + temp_load(s, ts, tcg_target_available_regs[ts->type], + i_allocated_regs, 0); + reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs, + i_preferred_regs, ts->indirect_base); + copyto_new_reg = true; + } + break; + + case 1: + /* First of an input pair; if i1 == i2, the second is an output. */ + i1 = i; + i2 = arg_ct->pair_index; + ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL; /* - * If the input is readonly, then it cannot also be an - * output and aliased to itself. If the input is not - * dead after the instruction, we must allocate a new - * register and move it. + * It is easier to default to allocating a new pair + * and to identify a few cases where it's not required. */ - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { - goto allocate_in_reg; + if (arg_ct->ialias) { + i_preferred_regs = output_pref(op, arg_ct->alias_index); + if (IS_DEAD_ARG(i1) && + IS_DEAD_ARG(i2) && + !temp_readonly(ts) && + ts->val_type == TEMP_VAL_REG && + ts->reg < TCG_TARGET_NB_REGS - 1 && + tcg_regset_test_reg(i_required_regs, reg) && + !tcg_regset_test_reg(i_allocated_regs, reg) && + !tcg_regset_test_reg(i_allocated_regs, reg + 1) && + (ts2 + ? ts2->val_type == TEMP_VAL_REG && + ts2->reg == reg + 1 && + !temp_readonly(ts2) + : s->reg_to_temp[reg + 1] == NULL)) { + break; + } + } else { + /* Without aliasing, the pair must also be an input. */ + tcg_debug_assert(ts2); + if (ts->val_type == TEMP_VAL_REG && + ts2->val_type == TEMP_VAL_REG && + ts2->reg == reg + 1 && + tcg_regset_test_reg(i_required_regs, reg)) { + break; + } } - + reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs, + 0, ts->indirect_base); + goto do_pair; + + case 2: /* pair second */ + reg = new_args[arg_ct->pair_index] + 1; + goto do_pair; + + case 3: /* ialias with second output, no first input */ + tcg_debug_assert(arg_ct->ialias); + i_preferred_regs = output_pref(op, arg_ct->alias_index); + + if (IS_DEAD_ARG(i) && + !temp_readonly(ts) && + ts->val_type == TEMP_VAL_REG && + reg > 0 && + s->reg_to_temp[reg - 1] == NULL && + tcg_regset_test_reg(i_required_regs, reg) && + !tcg_regset_test_reg(i_allocated_regs, reg) && + !tcg_regset_test_reg(i_allocated_regs, reg - 1)) { + tcg_regset_set_reg(i_allocated_regs, reg - 1); + break; + } + reg = tcg_reg_alloc_pair(s, i_required_regs >> 1, + i_allocated_regs, 0, + ts->indirect_base); + tcg_regset_set_reg(i_allocated_regs, reg); + reg += 1; + goto do_pair; + + do_pair: /* - * Check if the current register has already been allocated - * for another input aliased to an output. + * If an aliased input is not dead after the instruction, + * we must allocate a new register and move it. */ - if (ts->val_type == TEMP_VAL_REG) { - reg = ts->reg; - for (int k2 = 0; k2 < k; k2++) { - int i2 = def->args_ct[nb_oargs + k2].sort_index; - if (def->args_ct[i2].ialias && reg == new_args[i2]) { - goto allocate_in_reg; - } + if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) { + TCGRegSet t_allocated_regs = i_allocated_regs; + + /* + * Because of the alias, and the continued life, make sure + * that the temp is somewhere *other* than the reg pair, + * and we get a copy in reg. + */ + tcg_regset_set_reg(t_allocated_regs, reg); + tcg_regset_set_reg(t_allocated_regs, reg + 1); + if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) { + /* If ts was already in reg, copy it somewhere else. */ + TCGReg nr; + bool ok; + + tcg_debug_assert(ts->kind != TEMP_FIXED); + nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + t_allocated_regs, 0, ts->indirect_base); + ok = tcg_out_mov(s, ts->type, nr, reg); + tcg_debug_assert(ok); + + set_temp_val_reg(s, ts, nr); + } else { + temp_load(s, ts, tcg_target_available_regs[ts->type], + t_allocated_regs, 0); + copyto_new_reg = true; } + } else { + /* Preferably allocate to reg, otherwise copy. */ + i_required_regs = (TCGRegSet)1 << reg; + temp_load(s, ts, i_required_regs, i_allocated_regs, + i_preferred_regs); + copyto_new_reg = ts->reg != reg; } - i_preferred_regs = o_preferred_regs; - } + break; - temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs); - reg = ts->reg; + default: + g_assert_not_reached(); + } - if (!tcg_regset_test_reg(arg_ct->regs, reg)) { - allocate_in_reg: - /* - * Allocate a new register matching the constraint - * and move the temporary register into it. - */ - temp_load(s, ts, tcg_target_available_regs[ts->type], - i_allocated_regs, 0); - reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs, - o_preferred_regs, ts->indirect_base); + if (copyto_new_reg) { if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { /* * Cross register class move not supported. Sync the @@ -3678,7 +4089,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) const_args[i] = 0; tcg_regset_set_reg(i_allocated_regs, reg); } - + /* mark dead temporaries and free the associated registers */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { if (IS_DEAD_ARG(i)) { @@ -3692,7 +4103,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { - /* XXX: permit generic clobber register list ? */ + /* XXX: permit generic clobber register list ? */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { tcg_reg_free(s, i, i_allocated_regs); @@ -3704,7 +4115,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) an exception. */ sync_globals(s, i_allocated_regs); } - + /* satisfy the output constraints */ for(k = 0; k < nb_oargs; k++) { i = def->args_ct[k].sort_index; @@ -3715,28 +4126,50 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) /* ENV should not be modified. */ tcg_debug_assert(!temp_readonly(ts)); - if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { - reg = new_args[arg_ct->alias_index]; - } else if (arg_ct->newreg) { - reg = tcg_reg_alloc(s, arg_ct->regs, - i_allocated_regs | o_allocated_regs, - op->output_pref[k], ts->indirect_base); - } else { - reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, - op->output_pref[k], ts->indirect_base); + switch (arg_ct->pair) { + case 0: /* not paired */ + if (arg_ct->oalias && !const_args[arg_ct->alias_index]) { + reg = new_args[arg_ct->alias_index]; + } else if (arg_ct->newreg) { + reg = tcg_reg_alloc(s, arg_ct->regs, + i_allocated_regs | o_allocated_regs, + output_pref(op, k), ts->indirect_base); + } else { + reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs, + output_pref(op, k), ts->indirect_base); + } + break; + + case 1: /* first of pair */ + tcg_debug_assert(!arg_ct->newreg); + if (arg_ct->oalias) { + reg = new_args[arg_ct->alias_index]; + break; + } + reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, + output_pref(op, k), ts->indirect_base); + break; + + case 2: /* second of pair */ + tcg_debug_assert(!arg_ct->newreg); + if (arg_ct->oalias) { + reg = new_args[arg_ct->alias_index]; + } else { + reg = new_args[arg_ct->pair_index] + 1; + } + break; + + case 3: /* first of pair, aliasing with a second input */ + tcg_debug_assert(!arg_ct->newreg); + reg = new_args[arg_ct->pair_index] - 1; + break; + + default: + g_assert_not_reached(); } tcg_regset_set_reg(o_allocated_regs, reg); - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; - } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - /* - * Temp value is modified, so the value kept in memory is - * potentially not the same. - */ + set_temp_val_reg(s, ts, reg); ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; new_args[i] = reg; } } @@ -3786,6 +4219,7 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) TCGRegSet allocated_regs = s->reserved_regs; TCGRegSet dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs; + TCGReg oreg; /* Make sure to not spill the input registers. */ if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) { @@ -3795,11 +4229,9 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) tcg_regset_set_reg(allocated_regs, itsh->reg); } - ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, - op->output_pref[0], ots->indirect_base); - ots->val_type = TEMP_VAL_REG; - ots->mem_coherent = 0; - s->reg_to_temp[ots->reg] = ots; + oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, + output_pref(op, 0), ots->indirect_base); + set_temp_val_reg(s, ots, oreg); } /* Promote dup2 of immediates to dupi_vec. */ @@ -3820,18 +4252,14 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) } /* If the two inputs form one 64-bit value, try dupm_vec. */ - if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) { - if (!itsl->mem_coherent) { - temp_sync(s, itsl, s->reserved_regs, 0, 0); - } - if (!itsh->mem_coherent) { - temp_sync(s, itsh, s->reserved_regs, 0, 0); - } -#if HOST_BIG_ENDIAN - TCGTemp *its = itsh; -#else - TCGTemp *its = itsl; -#endif + if (itsl->temp_subindex == HOST_BIG_ENDIAN && + itsh->temp_subindex == !HOST_BIG_ENDIAN && + itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) { + TCGTemp *its = itsl - HOST_BIG_ENDIAN; + + temp_sync(s, its + 0, s->reserved_regs, 0, 0); + temp_sync(s, its + 1, s->reserved_regs, 0, 0); + if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg, its->mem_base->reg, its->mem_offset)) { goto done; @@ -3842,6 +4270,7 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) return false; done: + ots->mem_coherent = 0; if (IS_DEAD_ARG(1)) { temp_dead(s, itsl); } @@ -3856,152 +4285,137 @@ static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op) return true; } -#ifdef TCG_TARGET_STACK_GROWSUP -#define STACK_DIR(x) (-(x)) -#else -#define STACK_DIR(x) (x) -#endif +static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts, + TCGRegSet allocated_regs) +{ + if (ts->val_type == TEMP_VAL_REG) { + if (ts->reg != reg) { + tcg_reg_free(s, reg, allocated_regs); + if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { + /* + * Cross register class move not supported. Sync the + * temp back to its slot and load from there. + */ + temp_sync(s, ts, allocated_regs, 0, 0); + tcg_out_ld(s, ts->type, reg, + ts->mem_base->reg, ts->mem_offset); + } + } + } else { + TCGRegSet arg_set = 0; + + tcg_reg_free(s, reg, allocated_regs); + tcg_regset_set_reg(arg_set, reg); + temp_load(s, ts, arg_set, allocated_regs, 0); + } +} + +static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts, + TCGRegSet allocated_regs) +{ + /* + * When the destination is on the stack, load up the temp and store. + * If there are many call-saved registers, the temp might live to + * see another use; otherwise it'll be discarded. + */ + temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0); + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET + + stk_slot * sizeof(tcg_target_long)); +} + +static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, + TCGTemp *ts, TCGRegSet *allocated_regs) +{ + if (REG_P(l)) { + TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot]; + load_arg_reg(s, reg, ts, *allocated_regs); + tcg_regset_set_reg(*allocated_regs, reg); + } else { + load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs), + ts, *allocated_regs); + } +} static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) { const int nb_oargs = TCGOP_CALLO(op); const int nb_iargs = TCGOP_CALLI(op); const TCGLifeData arg_life = op->life; - const TCGHelperInfo *info; - int flags, nb_regs, i; - TCGReg reg; - TCGArg arg; - TCGTemp *ts; - intptr_t stack_offset; - size_t call_stack_size; - tcg_insn_unit *func_addr; - int allocate_args; - TCGRegSet allocated_regs; - - func_addr = tcg_call_func(op); - info = tcg_call_info(op); - flags = info->flags; - - nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); - if (nb_regs > nb_iargs) { - nb_regs = nb_iargs; - } - - /* assign stack slots first */ - call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); - call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & - ~(TCG_TARGET_STACK_ALIGN - 1); - allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); - if (allocate_args) { - /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed, - preallocate call stack */ - tcg_abort(); - } + const TCGHelperInfo *info = tcg_call_info(op); + TCGRegSet allocated_regs = s->reserved_regs; + int i; - stack_offset = TCG_TARGET_CALL_STACK_OFFSET; - for (i = nb_regs; i < nb_iargs; i++) { - arg = op->args[nb_oargs + i]; -#ifdef TCG_TARGET_STACK_GROWSUP - stack_offset -= sizeof(tcg_target_long); -#endif - if (arg != TCG_CALL_DUMMY_ARG) { - ts = arg_temp(arg); - temp_load(s, ts, tcg_target_available_regs[ts->type], - s->reserved_regs, 0); - tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); + /* + * Move inputs into place in reverse order, + * so that we place stacked arguments first. + */ + for (i = nb_iargs - 1; i >= 0; --i) { + const TCGCallArgumentLoc *loc = &info->in[i]; + TCGTemp *ts = arg_temp(op->args[nb_oargs + i]); + + switch (loc->kind) { + case TCG_CALL_ARG_NORMAL: + case TCG_CALL_ARG_EXTEND_U: + case TCG_CALL_ARG_EXTEND_S: + load_arg_normal(s, loc, ts, &allocated_regs); + break; + default: + g_assert_not_reached(); } -#ifndef TCG_TARGET_STACK_GROWSUP - stack_offset += sizeof(tcg_target_long); -#endif } - - /* assign input registers */ - allocated_regs = s->reserved_regs; - for (i = 0; i < nb_regs; i++) { - arg = op->args[nb_oargs + i]; - if (arg != TCG_CALL_DUMMY_ARG) { - ts = arg_temp(arg); - reg = tcg_target_call_iarg_regs[i]; - if (ts->val_type == TEMP_VAL_REG) { - if (ts->reg != reg) { - tcg_reg_free(s, reg, allocated_regs); - if (!tcg_out_mov(s, ts->type, reg, ts->reg)) { - /* - * Cross register class move not supported. Sync the - * temp back to its slot and load from there. - */ - temp_sync(s, ts, allocated_regs, 0, 0); - tcg_out_ld(s, ts->type, reg, - ts->mem_base->reg, ts->mem_offset); - } - } - } else { - TCGRegSet arg_set = 0; - - tcg_reg_free(s, reg, allocated_regs); - tcg_regset_set_reg(arg_set, reg); - temp_load(s, ts, arg_set, allocated_regs, 0); - } - - tcg_regset_set_reg(allocated_regs, reg); - } - } - - /* mark dead temporaries and free the associated registers */ + /* Mark dead temporaries and free the associated registers. */ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { if (IS_DEAD_ARG(i)) { temp_dead(s, arg_temp(op->args[i])); } } - - /* clobber call registers */ + + /* Clobber call registers. */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { tcg_reg_free(s, i, allocated_regs); } } - /* Save globals if they might be written by the helper, sync them if - they might be read. */ - if (flags & TCG_CALL_NO_READ_GLOBALS) { + /* + * Save globals if they might be written by the helper, + * sync them if they might be read. + */ + if (info->flags & TCG_CALL_NO_READ_GLOBALS) { /* Nothing to do */ - } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { + } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) { sync_globals(s, allocated_regs); } else { save_globals(s, allocated_regs); } -#ifdef CONFIG_TCG_INTERPRETER - { - gpointer hash = (gpointer)(uintptr_t)info->typemask; - ffi_cif *cif = g_hash_table_lookup(ffi_table, hash); - assert(cif != NULL); - tcg_out_call(s, func_addr, cif); - } -#else - tcg_out_call(s, func_addr); -#endif + tcg_out_call(s, tcg_call_func(op), info); - /* assign output registers and emit moves if needed */ - for(i = 0; i < nb_oargs; i++) { - arg = op->args[i]; - ts = arg_temp(arg); + /* Assign output registers and emit moves if needed. */ + switch (info->out_kind) { + case TCG_CALL_RET_NORMAL: + for (i = 0; i < nb_oargs; i++) { + TCGTemp *ts = arg_temp(op->args[i]); + TCGReg reg = tcg_target_call_oarg_regs[i]; - /* ENV should not be modified. */ - tcg_debug_assert(!temp_readonly(ts)); + /* ENV should not be modified. */ + tcg_debug_assert(!temp_readonly(ts)); - reg = tcg_target_call_oarg_regs[i]; - tcg_debug_assert(s->reg_to_temp[reg] == NULL); - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = NULL; + set_temp_val_reg(s, ts, reg); + ts->mem_coherent = 0; } - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 0; - s->reg_to_temp[reg] = ts; + break; + default: + g_assert_not_reached(); + } + + /* Flush or discard output registers as needed. */ + for (i = 0; i < nb_oargs; i++) { + TCGTemp *ts = arg_temp(op->args[i]); if (NEED_SYNC_ARG(i)) { - temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i)); + temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i)); } else if (IS_DEAD_ARG(i)) { temp_dead(s, ts); } @@ -4316,9 +4730,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) tcg_reg_alloc_op(s, op); break; } -#ifdef CONFIG_DEBUG_TCG - check_regs(s); -#endif /* Test for (pending) buffer overflow. The assumption is that any one operation beginning below the high water mark cannot overrun the buffer completely. Thus we can test for overflow after @@ -4395,7 +4806,7 @@ void tcg_dump_info(GString *buf) (double)s->code_out_len / tb_div_count); g_string_append_printf(buf, "avg search data/TB %0.1f\n", (double)s->search_out_len / tb_div_count); - + g_string_append_printf(buf, "cycles/op %0.1f\n", s->op_count ? (double)tot / s->op_count : 0); g_string_append_printf(buf, "cycles/in byte %0.1f\n", @@ -18,7 +18,6 @@ */ #include "qemu/osdep.h" -#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */ #include "exec/cpu_ldst.h" #include "tcg/tcg-op.h" #include "tcg/tcg-ldst.h" diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index f3d7441..d36a7eb 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -197,10 +197,6 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R0, }; -#if MAX_OPC_PARAM_IARGS != 7 -# error Fix needed, number of supported input arguments changed! -#endif - /* No call arguments via registers. All will be stored on the "stack". */ static const int tcg_target_call_iarg_regs[] = { }; @@ -562,8 +558,9 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, - ffi_cif *cif) + const TCGHelperInfo *info) { + ffi_cif *cif = info->cif; tcg_insn_unit insn = 0; uint8_t which; diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index ceb36c4..94ec541 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -158,6 +158,13 @@ typedef enum { /* Used for function call generation. */ #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_STACK_ALIGN 8 +#if TCG_TARGET_REG_BITS == 32 +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +#else +# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#endif #define HAVE_TCG_QEMU_TB_EXEC #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target index 75257f2..14bc013 100644 --- a/tests/tcg/Makefile.target +++ b/tests/tcg/Makefile.target @@ -117,6 +117,8 @@ endif %: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) +%: %.S + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) else # For softmmu targets we include a different Makefile fragement as the # build options for bare programs are usually pretty different. They diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index fc8d90e..db122ab 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -23,7 +23,8 @@ config-cc.mak: Makefile $(call cc-option,-march=armv8.1-a+sve2, CROSS_CC_HAS_SVE2); \ $(call cc-option,-march=armv8.3-a, CROSS_CC_HAS_ARMV8_3); \ $(call cc-option,-mbranch-protection=standard, CROSS_CC_HAS_ARMV8_BTI); \ - $(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE)) 3> config-cc.mak + $(call cc-option,-march=armv8.5-a+memtag, CROSS_CC_HAS_ARMV8_MTE); \ + $(call cc-option,-march=armv9-a+sme, CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak -include config-cc.mak # Pauth Tests @@ -53,7 +54,11 @@ endif ifneq ($(CROSS_CC_HAS_SVE),) # System Registers Tests AARCH64_TESTS += sysregs +ifneq ($(CROSS_CC_HAS_ARMV9_SME),) +sysregs: CFLAGS+=-march=armv9-a+sme -DHAS_ARMV9_SME +else sysregs: CFLAGS+=-march=armv8.1-a+sve +endif # SVE ioctl test AARCH64_TESTS += sve-ioctls diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c index 40cf8d2..46b931f 100644 --- a/tests/tcg/aarch64/sysregs.c +++ b/tests/tcg/aarch64/sysregs.c @@ -22,6 +22,13 @@ #define HWCAP_CPUID (1 << 11) #endif +/* + * Older assemblers don't recognize newer system register names, + * but we can still access them by the Sn_n_Cn_Cn_n syntax. + */ +#define SYS_ID_AA64ISAR2_EL1 S3_0_C0_C6_2 +#define SYS_ID_AA64MMFR2_EL1 S3_0_C0_C7_2 + int failed_bit_count; /* Read and print system register `id' value */ @@ -112,18 +119,23 @@ int main(void) * minimum valid fields - for the purposes of this check allowed * to have non-zero values. */ - get_cpu_reg_check_mask(id_aa64isar0_el1, _m(00ff,ffff,f0ff,fff0)); - get_cpu_reg_check_mask(id_aa64isar1_el1, _m(0000,00f0,ffff,ffff)); + get_cpu_reg_check_mask(id_aa64isar0_el1, _m(f0ff,ffff,f0ff,fff0)); + get_cpu_reg_check_mask(id_aa64isar1_el1, _m(00ff,f0ff,ffff,ffff)); + get_cpu_reg_check_mask(SYS_ID_AA64ISAR2_EL1, _m(0000,0000,0000,ffff)); /* TGran4 & TGran64 as pegged to -1 */ - get_cpu_reg_check_mask(id_aa64mmfr0_el1, _m(0000,0000,ff00,0000)); - get_cpu_reg_check_zero(id_aa64mmfr1_el1); + get_cpu_reg_check_mask(id_aa64mmfr0_el1, _m(f000,0000,ff00,0000)); + get_cpu_reg_check_mask(id_aa64mmfr1_el1, _m(0000,f000,0000,0000)); + get_cpu_reg_check_mask(SYS_ID_AA64MMFR2_EL1, _m(0000,000f,0000,0000)); /* EL1/EL0 reported as AA64 only */ get_cpu_reg_check_mask(id_aa64pfr0_el1, _m(000f,000f,00ff,0011)); - get_cpu_reg_check_mask(id_aa64pfr1_el1, _m(0000,0000,0000,00f0)); + get_cpu_reg_check_mask(id_aa64pfr1_el1, _m(0000,0000,0f00,0fff)); /* all hidden, DebugVer fixed to 0x6 (ARMv8 debug architecture) */ get_cpu_reg_check_mask(id_aa64dfr0_el1, _m(0000,0000,0000,0006)); get_cpu_reg_check_zero(id_aa64dfr1_el1); - get_cpu_reg_check_zero(id_aa64zfr0_el1); + get_cpu_reg_check_mask(id_aa64zfr0_el1, _m(0ff0,ff0f,00ff,00ff)); +#ifdef HAS_ARMV9_SME + get_cpu_reg_check_mask(id_aa64smfr0_el1, _m(80f1,00fd,0000,0000)); +#endif get_cpu_reg_check_zero(id_aa64afr0_el1); get_cpu_reg_check_zero(id_aa64afr1_el1); diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 9ee1faa..18e6a59 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -43,6 +43,8 @@ HEX_TESTS += load_align HEX_TESTS += atomics HEX_TESTS += fpstuff HEX_TESTS += overflow +HEX_TESTS += signal_context +HEX_TESTS += reg_mut HEX_TESTS += test_abs HEX_TESTS += test_bitcnt diff --git a/tests/tcg/hexagon/mem_noshuf.c b/tests/tcg/hexagon/mem_noshuf.c index 0f4064e..210b2f1 100644 --- a/tests/tcg/hexagon/mem_noshuf.c +++ b/tests/tcg/hexagon/mem_noshuf.c @@ -144,7 +144,7 @@ static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q, "}:mem_noshuf\n" : "=&r"(ret) : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred) - : "p0", "memory"); + : "r7", "p0", "memory"); return ret; } diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index f0b1947..e73ab57 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -186,10 +186,10 @@ static int L2_ploadrifnew_pi(void *p, int pred) int result; asm volatile("%0 = #31\n\t" "{\n\t" - " p0 = cmp.eq(%1, #1)\n\t" - " if (!p0.new) %0 = memw(%2++#4)\n\t" + " p0 = cmp.eq(%2, #1)\n\t" + " if (!p0.new) %0 = memw(%1++#4)\n\t" "}\n\t" - : "=r"(result) : "r"(pred), "r"(p) + : "=&r"(result), "+r"(p) : "r"(pred) : "p0"); return result; } diff --git a/tests/tcg/hexagon/reg_mut.c b/tests/tcg/hexagon/reg_mut.c new file mode 100644 index 0000000..910e663 --- /dev/null +++ b/tests/tcg/hexagon/reg_mut.c @@ -0,0 +1,152 @@ + +/* + * Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> + +static int err; + +#define check(N, EXPECT) \ + do { \ + uint64_t value = N; \ + uint64_t expect = EXPECT; \ + if (value != EXPECT) { \ + printf("ERROR: \"%s\" 0x%04llx != 0x%04llx at %s:%d\n", #N, value, \ + expect, __FILE__, __LINE__); \ + err++; \ + } \ + } while (0) + +#define check_ne(N, EXPECT) \ + do { \ + uint64_t value = N; \ + uint64_t expect = EXPECT; \ + if (value == EXPECT) { \ + printf("ERROR: \"%s\" 0x%04llx == 0x%04llx at %s:%d\n", #N, value, \ + expect, __FILE__, __LINE__); \ + err++; \ + } \ + } while (0) + +#define WRITE_REG_NOCLOBBER(output, reg_name, input) \ + asm volatile(reg_name " = %1\n\t" \ + "%0 = " reg_name "\n\t" \ + : "=r"(output) \ + : "r"(input) \ + : ); + +#define WRITE_REG_ENCODED(output, reg_name, input, encoding) \ + asm volatile("r0 = %1\n\t" \ + encoding "\n\t" \ + "%0 = " reg_name "\n\t" \ + : "=r"(output) \ + : "r"(input) \ + : "r0"); + +#define WRITE_REG_PAIR_ENCODED(output, reg_name, input, encoding) \ + asm volatile("r1:0 = %1\n\t" \ + encoding "\n\t" \ + "%0 = " reg_name "\n\t" \ + : "=r"(output) \ + : "r"(input) \ + : "r1:0"); + +/* + * Instruction word: { pc = r0 } + * + * This instruction is barred by the assembler. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Opc[A2_tfrrcr] | Src[R0] |P P| | C9/PC | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +#define PC_EQ_R0 ".word 0x6220c009" +#define C9_8_EQ_R1_0 ".word 0x6320c008" + +static inline void write_control_registers(void) +{ + uint32_t result = 0; + + WRITE_REG_NOCLOBBER(result, "usr", 0xffffffff); + check(result, 0x3ecfff3f); + + WRITE_REG_NOCLOBBER(result, "gp", 0xffffffff); + check(result, 0xffffffc0); + + WRITE_REG_NOCLOBBER(result, "upcyclelo", 0xffffffff); + check(result, 0x00000000); + + WRITE_REG_NOCLOBBER(result, "upcyclehi", 0xffffffff); + check(result, 0x00000000); + + WRITE_REG_NOCLOBBER(result, "utimerlo", 0xffffffff); + check(result, 0x00000000); + + WRITE_REG_NOCLOBBER(result, "utimerhi", 0xffffffff); + check(result, 0x00000000); + + /* + * PC is special. Setting it to these values + * should cause a catastrophic failure. + */ + WRITE_REG_ENCODED(result, "pc", 0x00000000, PC_EQ_R0); + check_ne(result, 0x00000000); + + WRITE_REG_ENCODED(result, "pc", 0x00000001, PC_EQ_R0); + check_ne(result, 0x00000001); + + WRITE_REG_ENCODED(result, "pc", 0xffffffff, PC_EQ_R0); + check_ne(result, 0xffffffff); +} + +static inline void write_control_register_pairs(void) +{ + uint64_t result = 0; + + WRITE_REG_NOCLOBBER(result, "c11:10", 0xffffffffffffffff); + check(result, 0xffffffc0ffffffff); + + WRITE_REG_NOCLOBBER(result, "c15:14", 0xffffffffffffffff); + check(result, 0x0000000000000000); + + WRITE_REG_NOCLOBBER(result, "c31:30", 0xffffffffffffffff); + check(result, 0x0000000000000000); + + WRITE_REG_PAIR_ENCODED(result, "c9:8", (uint64_t) 0x0000000000000000, + C9_8_EQ_R1_0); + check_ne(result, 0x000000000000000); + + WRITE_REG_PAIR_ENCODED(result, "c9:8", 0x0000000100000000, C9_8_EQ_R1_0); + check_ne(result, 0x0000000100000000); + + WRITE_REG_PAIR_ENCODED(result, "c9:8", 0xffffffffffffffff, C9_8_EQ_R1_0); + check_ne(result, 0xffffffffffffffff); +} + +int main() +{ + err = 0; + + write_control_registers(); + write_control_register_pairs(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/signal_context.c b/tests/tcg/hexagon/signal_context.c new file mode 100644 index 0000000..7202fa6 --- /dev/null +++ b/tests/tcg/hexagon/signal_context.c @@ -0,0 +1,84 @@ +/* + * Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <signal.h> +#include <time.h> + +void sig_user(int sig, siginfo_t *info, void *puc) +{ + asm("r7 = #0\n\t" + "p0 = r7\n\t" + "p1 = r7\n\t" + "p2 = r7\n\t" + "p3 = r7\n\t" + : : : "r7", "p0", "p1", "p2", "p3"); +} + +int main() +{ + int err = 0; + unsigned int i = 100000; + struct sigaction act; + struct itimerspec it; + timer_t tid; + struct sigevent sev; + + act.sa_sigaction = sig_user; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGUSR1, &act, NULL); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tid; + timer_create(CLOCK_REALTIME, &sev, &tid); + it.it_interval.tv_sec = 0; + it.it_interval.tv_nsec = 100000; + it.it_value.tv_sec = 0; + it.it_value.tv_nsec = 100000; + timer_settime(tid, 0, &it, NULL); + + asm("loop0(1f, %1)\n\t" + "1: r8 = #0xff\n\t" + " p0 = r8\n\t" + " p1 = r8\n\t" + " p2 = r8\n\t" + " p3 = r8\n\t" + " jump 3f\n\t" + "2: memb(%0) = #1\n\t" + " jump 4f\n\t" + "3:\n\t" + " r8 = p0\n\t" + " p0 = cmp.eq(r8, #0xff)\n\t" + " if (!p0) jump 2b\n\t" + " r8 = p1\n\t" + " p0 = cmp.eq(r8, #0xff)\n\t" + " if (!p0) jump 2b\n\t" + " r8 = p2\n\t" + " p0 = cmp.eq(r8, #0xff)\n\t" + " if (!p0) jump 2b\n\t" + " r8 = p3\n\t" + " p0 = cmp.eq(r8, #0xff)\n\t" + " if (!p0) jump 2b\n\t" + "4: {}: endloop0\n\t" + : + : "r"(&err), "r"(i) + : "memory", "r8", "p0", "p1", "p2", "p3"); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 5f0fee1..e7213af 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -39,6 +39,9 @@ signals: LDFLAGS+=-lrt -lpthread munmap-pthread: CFLAGS+=-pthread munmap-pthread: LDFLAGS+=-pthread +vma-pthread: CFLAGS+=-pthread +vma-pthread: LDFLAGS+=-pthread + # We define the runner for test-mmap after the individual # architectures have defined their supported pages sizes. If no # additional page sizes are defined we only run the default test. diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c index d7143b0..1c79005 100644 --- a/tests/tcg/multiarch/munmap-pthread.c +++ b/tests/tcg/multiarch/munmap-pthread.c @@ -7,21 +7,7 @@ #include <sys/mman.h> #include <unistd.h> -static const char nop_func[] = { -#if defined(__aarch64__) - 0xc0, 0x03, 0x5f, 0xd6, /* ret */ -#elif defined(__alpha__) - 0x01, 0x80, 0xFA, 0x6B, /* ret */ -#elif defined(__arm__) - 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ -#elif defined(__riscv) - 0x67, 0x80, 0x00, 0x00, /* ret */ -#elif defined(__s390__) - 0x07, 0xfe, /* br %r14 */ -#elif defined(__i386__) || defined(__x86_64__) - 0xc3, /* ret */ -#endif -}; +#include "nop_func.h" static void *thread_mmap_munmap(void *arg) { diff --git a/tests/tcg/multiarch/nop_func.h b/tests/tcg/multiarch/nop_func.h new file mode 100644 index 0000000..f714d21 --- /dev/null +++ b/tests/tcg/multiarch/nop_func.h @@ -0,0 +1,25 @@ +/* + * No-op functions that can be safely copied. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef NOP_FUNC_H +#define NOP_FUNC_H + +static const char nop_func[] = { +#if defined(__aarch64__) + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ +#elif defined(__alpha__) + 0x01, 0x80, 0xFA, 0x6B, /* ret */ +#elif defined(__arm__) + 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ +#elif defined(__riscv) + 0x67, 0x80, 0x00, 0x00, /* ret */ +#elif defined(__s390__) + 0x07, 0xfe, /* br %r14 */ +#elif defined(__i386__) || defined(__x86_64__) + 0xc3, /* ret */ +#endif +}; + +#endif diff --git a/tests/tcg/multiarch/vma-pthread.c b/tests/tcg/multiarch/vma-pthread.c new file mode 100644 index 0000000..7045da0 --- /dev/null +++ b/tests/tcg/multiarch/vma-pthread.c @@ -0,0 +1,207 @@ +/* + * Test that VMA updates do not race. + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Map a contiguous chunk of RWX memory. Split it into 8 equally sized + * regions, each of which is guaranteed to have a certain combination of + * protection bits set. + * + * Reader, writer and executor threads perform the respective operations on + * pages, which are guaranteed to have the respective protection bit set. + * Two mutator threads change the non-fixed protection bits randomly. + */ +#include <assert.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "nop_func.h" + +#define PAGE_IDX_BITS 10 +#define PAGE_COUNT (1 << PAGE_IDX_BITS) +#define PAGE_IDX_MASK (PAGE_COUNT - 1) +#define REGION_IDX_BITS 3 +#define PAGE_IDX_R_MASK (1 << 7) +#define PAGE_IDX_W_MASK (1 << 8) +#define PAGE_IDX_X_MASK (1 << 9) +#define REGION_MASK (PAGE_IDX_R_MASK | PAGE_IDX_W_MASK | PAGE_IDX_X_MASK) +#define PAGES_PER_REGION (1 << (PAGE_IDX_BITS - REGION_IDX_BITS)) + +struct context { + int pagesize; + char *ptr; + int dev_null_fd; + volatile int mutator_count; +}; + +static void *thread_read(void *arg) +{ + struct context *ctx = arg; + ssize_t sret; + size_t i, j; + int ret; + + for (i = 0; ctx->mutator_count; i++) { + char *p; + + j = (i & PAGE_IDX_MASK) | PAGE_IDX_R_MASK; + p = &ctx->ptr[j * ctx->pagesize]; + + /* Read directly. */ + ret = memcmp(p, nop_func, sizeof(nop_func)); + if (ret != 0) { + fprintf(stderr, "fail direct read %p\n", p); + abort(); + } + + /* Read indirectly. */ + sret = write(ctx->dev_null_fd, p, 1); + if (sret != 1) { + if (sret < 0) { + fprintf(stderr, "fail indirect read %p (%m)\n", p); + } else { + fprintf(stderr, "fail indirect read %p (%zd)\n", p, sret); + } + abort(); + } + } + + return NULL; +} + +static void *thread_write(void *arg) +{ + struct context *ctx = arg; + struct timespec *ts; + size_t i, j; + int ret; + + for (i = 0; ctx->mutator_count; i++) { + j = (i & PAGE_IDX_MASK) | PAGE_IDX_W_MASK; + + /* Write directly. */ + memcpy(&ctx->ptr[j * ctx->pagesize], nop_func, sizeof(nop_func)); + + /* Write using a syscall. */ + ts = (struct timespec *)(&ctx->ptr[(j + 1) * ctx->pagesize] - + sizeof(struct timespec)); + ret = clock_gettime(CLOCK_REALTIME, ts); + if (ret != 0) { + fprintf(stderr, "fail indirect write %p (%m)\n", ts); + abort(); + } + } + + return NULL; +} + +static void *thread_execute(void *arg) +{ + struct context *ctx = arg; + size_t i, j; + + for (i = 0; ctx->mutator_count; i++) { + j = (i & PAGE_IDX_MASK) | PAGE_IDX_X_MASK; + ((void(*)(void))&ctx->ptr[j * ctx->pagesize])(); + } + + return NULL; +} + +static void *thread_mutate(void *arg) +{ + size_t i, start_idx, end_idx, page_idx, tmp; + struct context *ctx = arg; + unsigned int seed; + int prot, ret; + + seed = (unsigned int)time(NULL); + for (i = 0; i < 10000; i++) { + start_idx = rand_r(&seed) & PAGE_IDX_MASK; + end_idx = rand_r(&seed) & PAGE_IDX_MASK; + if (start_idx > end_idx) { + tmp = start_idx; + start_idx = end_idx; + end_idx = tmp; + } + prot = rand_r(&seed) & (PROT_READ | PROT_WRITE | PROT_EXEC); + for (page_idx = start_idx & REGION_MASK; page_idx <= end_idx; + page_idx += PAGES_PER_REGION) { + if (page_idx & PAGE_IDX_R_MASK) { + prot |= PROT_READ; + } + if (page_idx & PAGE_IDX_W_MASK) { + /* FIXME: qemu syscalls check for both read+write. */ + prot |= PROT_WRITE | PROT_READ; + } + if (page_idx & PAGE_IDX_X_MASK) { + prot |= PROT_EXEC; + } + } + ret = mprotect(&ctx->ptr[start_idx * ctx->pagesize], + (end_idx - start_idx + 1) * ctx->pagesize, prot); + assert(ret == 0); + } + + __atomic_fetch_sub(&ctx->mutator_count, 1, __ATOMIC_SEQ_CST); + + return NULL; +} + +int main(void) +{ + pthread_t threads[5]; + struct context ctx; + size_t i; + int ret; + + /* Without a template, nothing to test. */ + if (sizeof(nop_func) == 0) { + return EXIT_SUCCESS; + } + + /* Initialize memory chunk. */ + ctx.pagesize = getpagesize(); + ctx.ptr = mmap(NULL, PAGE_COUNT * ctx.pagesize, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(ctx.ptr != MAP_FAILED); + for (i = 0; i < PAGE_COUNT; i++) { + memcpy(&ctx.ptr[i * ctx.pagesize], nop_func, sizeof(nop_func)); + } + ctx.dev_null_fd = open("/dev/null", O_WRONLY); + assert(ctx.dev_null_fd >= 0); + ctx.mutator_count = 2; + + /* Start threads. */ + ret = pthread_create(&threads[0], NULL, thread_read, &ctx); + assert(ret == 0); + ret = pthread_create(&threads[1], NULL, thread_write, &ctx); + assert(ret == 0); + ret = pthread_create(&threads[2], NULL, thread_execute, &ctx); + assert(ret == 0); + for (i = 3; i <= 4; i++) { + ret = pthread_create(&threads[i], NULL, thread_mutate, &ctx); + assert(ret == 0); + } + + /* Wait for threads to stop. */ + for (i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) { + ret = pthread_join(threads[i], NULL); + assert(ret == 0); + } + + /* Destroy memory chunk. */ + ret = close(ctx.dev_null_fd); + assert(ret == 0); + ret = munmap(ctx.ptr, PAGE_COUNT * ctx.pagesize); + assert(ret == 0); + + return EXIT_SUCCESS; +} diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target index b5b89df..cc3ed65 100644 --- a/tests/tcg/riscv64/Makefile.target +++ b/tests/tcg/riscv64/Makefile.target @@ -4,3 +4,9 @@ VPATH += $(SRC_PATH)/tests/tcg/riscv64 TESTS += test-div TESTS += noexec + +# Disable compressed instructions for test-noc +TESTS += test-noc +test-noc: LDFLAGS = -nostdlib -static +run-test-noc: QEMU_OPTS += -cpu rv64,c=false +run-plugin-test-noc-%: QEMU_OPTS += -cpu rv64,c=false diff --git a/tests/tcg/riscv64/test-noc.S b/tests/tcg/riscv64/test-noc.S new file mode 100644 index 0000000..e29d60c --- /dev/null +++ b/tests/tcg/riscv64/test-noc.S @@ -0,0 +1,32 @@ +#include <asm/unistd.h> + + .text + .globl _start +_start: + .option norvc + li a0, 4 /* SIGILL */ + la a1, sa + li a2, 0 + li a3, 8 + li a7, __NR_rt_sigaction + scall + + .option rvc + li a0, 1 + j exit + .option norvc + +pass: + li a0, 0 +exit: + li a7, __NR_exit + scall + + .data + /* struct kernel_sigaction sa = { .sa_handler = pass }; */ + .type sa, @object + .size sa, 32 +sa: + .dword pass + .zero 24 + |