From 3a5f6805c7ca7deb8d1abaf0153936eeb51d074e Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 17 Oct 2022 07:28:30 +0300
Subject: tcg/sparc: Remove support for sparc32plus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since 9b9c37c36439, we have only supported sparc64 cpus.
Debian and Gentoo now only support 64-bit sparc64 userland,
so it is time to drop the 32-bit sparc64 userland: sparc32plus.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/sparc/tcg-target.c.inc | 168 +++++++++------------------------------------
 tcg/sparc/tcg-target.h     |  11 ---
 tcg/tcg.c                  |  75 +-------------------
 3 files changed, 34 insertions(+), 220 deletions(-)

diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 72d9552..097bcfc 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -22,6 +22,11 @@
  * THE SOFTWARE.
  */
 
+/* We only support generating code for 64-bit mode.  */
+#ifndef __arch64__
+#error "unsupported code generation mode"
+#endif
+
 #include "../tcg-pool.c.inc"
 
 #ifdef CONFIG_DEBUG_TCG
@@ -61,12 +66,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
-#ifdef __arch64__
-# define SPARC64 1
-#else
-# define SPARC64 0
-#endif
-
 #define TCG_CT_CONST_S11  0x100
 #define TCG_CT_CONST_S13  0x200
 #define TCG_CT_CONST_ZERO 0x400
@@ -91,11 +90,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  * high bits of the %i and %l registers garbage at all times.
  */
 #define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 32)
-#if SPARC64
 # define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
-#else
-# define ALL_GENERAL_REGS64  MAKE_64BIT_MASK(0, 16)
-#endif
 #define ALL_QLDST_REGS       (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
 #define ALL_QLDST_REGS64     (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
 
@@ -306,11 +301,7 @@ static bool check_fit_i32(int32_t val, unsigned int bits)
 }
 
 #define check_fit_tl    check_fit_i64
-#if SPARC64
-# define check_fit_ptr  check_fit_i64
-#else
-# define check_fit_ptr  check_fit_i32
-#endif
+#define check_fit_ptr   check_fit_i64
 
 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
                         intptr_t value, intptr_t addend)
@@ -573,11 +564,6 @@ static void tcg_out_sety(TCGContext *s, TCGReg rs)
     tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
 }
 
-static void tcg_out_rdy(TCGContext *s, TCGReg rd)
-{
-    tcg_out32(s, RDY | INSN_RD(rd));
-}
-
 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
                           int32_t val2, int val2const, int uns)
 {
@@ -914,9 +900,7 @@ static void emit_extend(TCGContext *s, TCGReg r, int op)
         tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
         break;
     case MO_32:
-        if (SPARC64) {
-            tcg_out_arith(s, r, r, 0, SHIFT_SRL);
-        }
+        tcg_out_arith(s, r, r, 0, SHIFT_SRL);
         break;
     case MO_64:
         break;
@@ -948,7 +932,6 @@ static void build_trampolines(TCGContext *s)
     };
 
     int i;
-    TCGReg ra;
 
     for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
         if (qemu_ld_helpers[i] == NULL) {
@@ -961,16 +944,8 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
 
-        if (SPARC64 || TARGET_LONG_BITS == 32) {
-            ra = TCG_REG_O3;
-        } else {
-            /* Install the high part of the address.  */
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
-            ra = TCG_REG_O4;
-        }
-
         /* Set the retaddr operand.  */
-        tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
         /* Tail call.  */
         tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
         /* delay slot -- set the env argument */
@@ -988,37 +963,10 @@ static void build_trampolines(TCGContext *s)
         }
         qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
 
-        if (SPARC64) {
-            emit_extend(s, TCG_REG_O2, i);
-            ra = TCG_REG_O4;
-        } else {
-            ra = TCG_REG_O1;
-            if (TARGET_LONG_BITS == 64) {
-                /* Install the high part of the address.  */
-                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
-                ra += 2;
-            } else {
-                ra += 1;
-            }
-            if ((i & MO_SIZE) == MO_64) {
-                /* Install the high part of the data.  */
-                tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
-                ra += 2;
-            } else {
-                emit_extend(s, ra, i);
-                ra += 1;
-            }
-            /* Skip the oi argument.  */
-            ra += 1;
-        }
-                
+        emit_extend(s, TCG_REG_O2, i);
+
         /* Set the retaddr operand.  */
-        if (ra >= TCG_REG_O6) {
-            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
-                       TCG_TARGET_CALL_STACK_OFFSET);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
-        }
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
 
         /* Tail call.  */
         tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
@@ -1047,11 +995,6 @@ static void build_trampolines(TCGContext *s)
             qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
         }
 
-        if (!SPARC64 && TARGET_LONG_BITS == 64) {
-            /* Install the high part of the address.  */
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
-        }
-
         /* Tail call.  */
         tcg_out_jmpl_const(s, helper, true, true);
         /* delay slot -- set the env argument */
@@ -1182,7 +1125,7 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
     tcg_out_cmp(s, r0, r2, 0);
 
     /* If the guest address must be zero-extended, do so now.  */
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
         return r0;
     }
@@ -1231,7 +1174,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGReg addrz, param;
+    TCGReg addrz;
     const tcg_insn_unit *func;
 
     addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1251,12 +1194,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 
     /* TLB Miss.  */
 
-    param = TCG_REG_O1;
-    if (!SPARC64 && TARGET_LONG_BITS == 64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
 
     /* We use the helpers to extend SB and SW data, leaving the case
        of SL needing explicit extending below.  */
@@ -1268,30 +1206,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_debug_assert(func != NULL);
     tcg_out_call_nodelay(s, func, false);
     /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
-
-    /* Recall that all of the helpers return 64-bit results.
-       Which complicates things for sparcv8plus.  */
-    if (SPARC64) {
-        /* We let the helper sign-extend SB and SW, but leave SL for here.  */
-        if (is_64 && (memop & MO_SSIZE) == MO_SL) {
-            tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
-        }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
+
+    /* We let the helper sign-extend SB and SW, but leave SL for here.  */
+    if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+        tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
     } else {
-        if ((memop & MO_SIZE) == MO_64) {
-            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
-            tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
-            tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
-        } else if (is_64) {
-            /* Re-extend from 32-bit rather than reassembling when we
-               know the high register must be an extension.  */
-            tcg_out_arithi(s, data, TCG_REG_O1, 0,
-                           memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
-        }
+        tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
     }
 
     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
@@ -1301,7 +1222,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     unsigned s_bits = memop & MO_SIZE;
     unsigned t_bits;
 
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
         addr = TCG_REG_T1;
     }
@@ -1337,10 +1258,9 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
          * operation in the delay slot, and failure need only invoke the
          * handler for SIGBUS.
          */
-        TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
         tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
         /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, arg_low, addr);
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
     } else {
         /* Underalignment: load by pieces of minimum alignment. */
         int ld_opc, a_size, s_size, i;
@@ -1400,7 +1320,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 
 #ifdef CONFIG_SOFTMMU
     unsigned memi = get_mmuidx(oi);
-    TCGReg addrz, param;
+    TCGReg addrz;
     const tcg_insn_unit *func;
 
     addrz = tcg_out_tlb_load(s, addr, memi, memop,
@@ -1418,23 +1338,14 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 
     /* TLB Miss.  */
 
-    param = TCG_REG_O1;
-    if (!SPARC64 && TARGET_LONG_BITS == 64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
-    if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
-        /* Skip the high-part; we'll perform the extract in the trampoline.  */
-        param++;
-    }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, data);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
 
     func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
     tcg_debug_assert(func != NULL);
     tcg_out_call_nodelay(s, func, false);
     /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
 
     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
 #else
@@ -1443,7 +1354,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     unsigned s_bits = memop & MO_SIZE;
     unsigned t_bits;
 
-    if (SPARC64 && TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
         addr = TCG_REG_T1;
     }
@@ -1479,10 +1390,9 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
          * operation in the delay slot, and failure need only invoke the
          * handler for SIGBUS.
          */
-        TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
         tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
         /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, arg_low, addr);
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
     } else {
         /* Underalignment: store by pieces of minimum alignment. */
         int st_opc, a_size, s_size, i;
@@ -1719,14 +1629,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_muls2_i32:
         c = ARITH_SMUL;
     do_mul2:
-        /* The 32-bit multiply insns produce a full 64-bit result.  If the
-           destination register can hold it, we can avoid the slower RDY.  */
+        /* The 32-bit multiply insns produce a full 64-bit result. */
         tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
-        if (SPARC64 || a0 <= TCG_REG_O7) {
-            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
-        } else {
-            tcg_out_rdy(s, a1);
-        }
+        tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
         break;
 
     case INDEX_op_qemu_ld_i32:
@@ -1984,16 +1889,11 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
 }
 
-#if SPARC64
-# define ELF_HOST_MACHINE  EM_SPARCV9
-#else
-# define ELF_HOST_MACHINE  EM_SPARC32PLUS
-# define ELF_HOST_FLAGS    EF_SPARC_32PLUS
-#endif
+#define ELF_HOST_MACHINE  EM_SPARCV9
 
 typedef struct {
     DebugFrameHeader h;
-    uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
+    uint8_t fde_def_cfa[4];
     uint8_t fde_win_save;
     uint8_t fde_ret_save[3];
 } DebugFrame;
@@ -2010,12 +1910,8 @@ static const DebugFrame debug_frame = {
     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
 
     .fde_def_cfa = {
-#if SPARC64
         12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
         (2047 & 0x7f) | 0x80, (2047 >> 7)
-#else
-        13, 30                          /* DW_CFA_def_cfa_register i6 */
-#endif
     },
     .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
     .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index c050763..8655acd 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -25,8 +25,6 @@
 #ifndef SPARC_TCG_TARGET_H
 #define SPARC_TCG_TARGET_H
 
-#define TCG_TARGET_REG_BITS 64
-
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
@@ -70,19 +68,10 @@ typedef enum {
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_O6
 
-#ifdef __arch64__
 #define TCG_TARGET_STACK_BIAS           2047
 #define TCG_TARGET_STACK_ALIGN          16
 #define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
-#else
-#define TCG_TARGET_STACK_BIAS           0
-#define TCG_TARGET_STACK_ALIGN          8
-#define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4)
-#endif
-
-#ifdef __arch64__
 #define TCG_TARGET_EXTEND_ARGS 1
-#endif
 
 #if defined(__VIS__) && __VIS__ >= 0x300
 #define use_vis3_instructions  1
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 612a12f..c9e664e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1487,39 +1487,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     }
 #endif
 
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-    /* We have 64-bit values in one register, but need to pass as two
-       separate parameters.  Split them.  */
-    int orig_typemask = typemask;
-    int orig_nargs = nargs;
-    TCGv_i64 retl, reth;
-    TCGTemp *split_args[MAX_OPC_PARAM];
-
-    retl = NULL;
-    reth = NULL;
-    typemask = 0;
-    for (i = real_args = 0; i < nargs; ++i) {
-        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
-        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
-
-        if (is_64bit) {
-            TCGv_i64 orig = temp_tcgv_i64(args[i]);
-            TCGv_i32 h = tcg_temp_new_i32();
-            TCGv_i32 l = tcg_temp_new_i32();
-            tcg_gen_extr_i64_i32(l, h, orig);
-            split_args[real_args++] = tcgv_i32_temp(h);
-            typemask |= dh_typecode_i32 << (real_args * 3);
-            split_args[real_args++] = tcgv_i32_temp(l);
-            typemask |= dh_typecode_i32 << (real_args * 3);
-        } else {
-            split_args[real_args++] = args[i];
-            typemask |= argtype << (real_args * 3);
-        }
-    }
-    nargs = real_args;
-    args = split_args;
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int argtype = extract32(typemask, (i + 1) * 3, 3);
         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
@@ -1542,22 +1510,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 
     pi = 0;
     if (ret != NULL) {
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-        if ((typemask & 6) == dh_typecode_i64) {
-            /* The 32-bit ABI is going to return the 64-bit value in
-               the %o0/%o1 register pair.  Prepare for this by using
-               two return temporaries, and reassemble below.  */
-            retl = tcg_temp_new_i64();
-            reth = tcg_temp_new_i64();
-            op->args[pi++] = tcgv_i64_arg(reth);
-            op->args[pi++] = tcgv_i64_arg(retl);
-            nb_rets = 2;
-        } else {
-            op->args[pi++] = temp_arg(ret);
-            nb_rets = 1;
-        }
-#else
         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
 #if HOST_BIG_ENDIAN
             op->args[pi++] = temp_arg(ret + 1);
@@ -1571,7 +1523,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
             op->args[pi++] = temp_arg(ret);
             nb_rets = 1;
         }
-#endif
     } else {
         nb_rets = 0;
     }
@@ -1634,29 +1585,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
 
-#if defined(__sparc__) && !defined(__arch64__) \
-    && !defined(CONFIG_TCG_INTERPRETER)
-    /* Free all of the parts we allocated above.  */
-    for (i = real_args = 0; i < orig_nargs; ++i) {
-        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
-        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
-
-        if (is_64bit) {
-            tcg_temp_free_internal(args[real_args++]);
-            tcg_temp_free_internal(args[real_args++]);
-        } else {
-            real_args++;
-        }
-    }
-    if ((orig_typemask & 6) == dh_typecode_i64) {
-        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
-           Note that describing these as TCGv_i64 eliminates an unnecessary
-           zero-extension that tcg_gen_concat_i32_i64 would create.  */
-        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
-        tcg_temp_free_i64(retl);
-        tcg_temp_free_i64(reth);
-    }
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
         int argtype = extract32(typemask, (i + 1) * 3, 3);
         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
-- 
cgit v1.1


From 6d0b52ed889f47fa8e39e9611d7bce15cc533369 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 17 Oct 2022 08:00:57 +0300
Subject: tcg/sparc64: Rename from tcg/sparc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Emphasize that we only support full 64-bit code generation.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 MAINTAINERS                      |    2 +-
 meson.build                      |    4 +-
 tcg/sparc/tcg-target-con-set.h   |   32 -
 tcg/sparc/tcg-target-con-str.h   |   23 -
 tcg/sparc/tcg-target.c.inc       | 1965 --------------------------------------
 tcg/sparc/tcg-target.h           |  161 ----
 tcg/sparc64/tcg-target-con-set.h |   32 +
 tcg/sparc64/tcg-target-con-str.h |   23 +
 tcg/sparc64/tcg-target.c.inc     | 1965 ++++++++++++++++++++++++++++++++++++++
 tcg/sparc64/tcg-target.h         |  161 ++++
 10 files changed, 2183 insertions(+), 2185 deletions(-)
 delete mode 100644 tcg/sparc/tcg-target-con-set.h
 delete mode 100644 tcg/sparc/tcg-target-con-str.h
 delete mode 100644 tcg/sparc/tcg-target.c.inc
 delete mode 100644 tcg/sparc/tcg-target.h
 create mode 100644 tcg/sparc64/tcg-target-con-set.h
 create mode 100644 tcg/sparc64/tcg-target-con-str.h
 create mode 100644 tcg/sparc64/tcg-target.c.inc
 create mode 100644 tcg/sparc64/tcg-target.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c41d8d6..62bbbba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3370,7 +3370,7 @@ L: qemu-s390x@nongnu.org
 
 SPARC TCG target
 S: Odd Fixes
-F: tcg/sparc/
+F: tcg/sparc64/
 F: disas/sparc.c
 
 TCI TCG target
diff --git a/meson.build b/meson.build
index 1c1afcc..d809d51 100644
--- a/meson.build
+++ b/meson.build
@@ -49,7 +49,7 @@ qapi_trace_events = []
 bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin']
 supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
 supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
-  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64']
+  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64']
 
 cpu = host_machine.cpu_family()
 
@@ -469,8 +469,6 @@ if get_option('tcg').allowed()
   endif
   if get_option('tcg_interpreter')
     tcg_arch = 'tci'
-  elif host_arch == 'sparc64'
-    tcg_arch = 'sparc'
   elif host_arch == 'x86_64'
     tcg_arch = 'i386'
   elif host_arch == 'ppc64'
diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc/tcg-target-con-set.h
deleted file mode 100644
index 3b751dc..0000000
--- a/tcg/sparc/tcg-target-con-set.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Define Sparc target-specific constraint sets.
- * Copyright (c) 2021 Linaro
- */
-
-/*
- * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
- * Each operand should be a sequence of constraint letters as defined by
- * tcg-target-con-str.h; the constraint combination is inclusive or.
- */
-C_O0_I1(r)
-C_O0_I2(rZ, r)
-C_O0_I2(RZ, r)
-C_O0_I2(rZ, rJ)
-C_O0_I2(RZ, RJ)
-C_O0_I2(sZ, A)
-C_O0_I2(SZ, A)
-C_O1_I1(r, A)
-C_O1_I1(R, A)
-C_O1_I1(r, r)
-C_O1_I1(r, R)
-C_O1_I1(R, r)
-C_O1_I1(R, R)
-C_O1_I2(R, R, R)
-C_O1_I2(r, rZ, rJ)
-C_O1_I2(R, RZ, RJ)
-C_O1_I4(r, rZ, rJ, rI, 0)
-C_O1_I4(R, RZ, RJ, RI, 0)
-C_O2_I2(r, r, rZ, rJ)
-C_O2_I4(R, R, RZ, RZ, RJ, RI)
-C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc/tcg-target-con-str.h
deleted file mode 100644
index fdb25d9..0000000
--- a/tcg/sparc/tcg-target-con-str.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Define Sparc target-specific operand constraints.
- * Copyright (c) 2021 Linaro
- */
-
-/*
- * Define constraint letters for register sets:
- * REGS(letter, register_mask)
- */
-REGS('r', ALL_GENERAL_REGS)
-REGS('R', ALL_GENERAL_REGS64)
-REGS('s', ALL_QLDST_REGS)
-REGS('S', ALL_QLDST_REGS64)
-REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
-
-/*
- * Define constraint letters for constants:
- * CONST(letter, TCG_CT_CONST_* bit set)
- */
-CONST('I', TCG_CT_CONST_S11)
-CONST('J', TCG_CT_CONST_S13)
-CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
deleted file mode 100644
index 097bcfc..0000000
--- a/tcg/sparc/tcg-target.c.inc
+++ /dev/null
@@ -1,1965 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/* We only support generating code for 64-bit mode.  */
-#ifndef __arch64__
-#error "unsupported code generation mode"
-#endif
-
-#include "../tcg-pool.c.inc"
-
-#ifdef CONFIG_DEBUG_TCG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%g0",
-    "%g1",
-    "%g2",
-    "%g3",
-    "%g4",
-    "%g5",
-    "%g6",
-    "%g7",
-    "%o0",
-    "%o1",
-    "%o2",
-    "%o3",
-    "%o4",
-    "%o5",
-    "%o6",
-    "%o7",
-    "%l0",
-    "%l1",
-    "%l2",
-    "%l3",
-    "%l4",
-    "%l5",
-    "%l6",
-    "%l7",
-    "%i0",
-    "%i1",
-    "%i2",
-    "%i3",
-    "%i4",
-    "%i5",
-    "%i6",
-    "%i7",
-};
-#endif
-
-#define TCG_CT_CONST_S11  0x100
-#define TCG_CT_CONST_S13  0x200
-#define TCG_CT_CONST_ZERO 0x400
-
-/*
- * For softmmu, we need to avoid conflicts with the first 3
- * argument registers to perform the tlb lookup, and to call
- * the helper function.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
-
-/*
- * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
- * registers.  These are saved manually by the kernel in full 64-bit
- * slots.  The %i and %l registers are saved by the register window
- * mechanism, which only allocates space for 32 bits.  Given that this
- * window spill/fill can happen on any signal, we must consider the
- * high bits of the %i and %l registers garbage at all times.
- */
-#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 32)
-# define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
-#define ALL_QLDST_REGS       (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
-#define ALL_QLDST_REGS64     (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
-
-/* Define some temporary registers.  T2 is used for constant generation.  */
-#define TCG_REG_T1  TCG_REG_G1
-#define TCG_REG_T2  TCG_REG_O7
-
-#ifndef CONFIG_SOFTMMU
-# define TCG_GUEST_BASE_REG TCG_REG_I5
-#endif
-
-#define TCG_REG_TB  TCG_REG_I1
-#define USE_REG_TB  (sizeof(void *) > 4)
-
-static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_L0,
-    TCG_REG_L1,
-    TCG_REG_L2,
-    TCG_REG_L3,
-    TCG_REG_L4,
-    TCG_REG_L5,
-    TCG_REG_L6,
-    TCG_REG_L7,
-
-    TCG_REG_I0,
-    TCG_REG_I1,
-    TCG_REG_I2,
-    TCG_REG_I3,
-    TCG_REG_I4,
-    TCG_REG_I5,
-
-    TCG_REG_G2,
-    TCG_REG_G3,
-    TCG_REG_G4,
-    TCG_REG_G5,
-
-    TCG_REG_O0,
-    TCG_REG_O1,
-    TCG_REG_O2,
-    TCG_REG_O3,
-    TCG_REG_O4,
-    TCG_REG_O5,
-};
-
-static const int tcg_target_call_iarg_regs[6] = {
-    TCG_REG_O0,
-    TCG_REG_O1,
-    TCG_REG_O2,
-    TCG_REG_O3,
-    TCG_REG_O4,
-    TCG_REG_O5,
-};
-
-static const int tcg_target_call_oarg_regs[] = {
-    TCG_REG_O0,
-    TCG_REG_O1,
-    TCG_REG_O2,
-    TCG_REG_O3,
-};
-
-#define INSN_OP(x)  ((x) << 30)
-#define INSN_OP2(x) ((x) << 22)
-#define INSN_OP3(x) ((x) << 19)
-#define INSN_OPF(x) ((x) << 5)
-#define INSN_RD(x)  ((x) << 25)
-#define INSN_RS1(x) ((x) << 14)
-#define INSN_RS2(x) (x)
-#define INSN_ASI(x) ((x) << 5)
-
-#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
-#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
-#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
-#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
-#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
-#define INSN_COND(x) ((x) << 25)
-
-#define COND_N     0x0
-#define COND_E     0x1
-#define COND_LE    0x2
-#define COND_L     0x3
-#define COND_LEU   0x4
-#define COND_CS    0x5
-#define COND_NEG   0x6
-#define COND_VS    0x7
-#define COND_A     0x8
-#define COND_NE    0x9
-#define COND_G     0xa
-#define COND_GE    0xb
-#define COND_GU    0xc
-#define COND_CC    0xd
-#define COND_POS   0xe
-#define COND_VC    0xf
-#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
-
-#define RCOND_Z    1
-#define RCOND_LEZ  2
-#define RCOND_LZ   3
-#define RCOND_NZ   5
-#define RCOND_GZ   6
-#define RCOND_GEZ  7
-
-#define MOVCC_ICC  (1 << 18)
-#define MOVCC_XCC  (1 << 18 | 1 << 12)
-
-#define BPCC_ICC   0
-#define BPCC_XCC   (2 << 20)
-#define BPCC_PT    (1 << 19)
-#define BPCC_PN    0
-#define BPCC_A     (1 << 29)
-
-#define BPR_PT     BPCC_PT
-
-#define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
-#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
-#define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
-#define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11))
-#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
-#define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
-#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
-#define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06))
-#define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
-#define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
-#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
-#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
-#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
-#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
-#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
-#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
-#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
-#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
-#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
-#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
-#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
-#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
-
-#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
-#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
-
-#define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
-#define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
-#define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27))
-
-#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
-#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
-#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
-
-#define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
-#define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
-#define JMPL       (INSN_OP(2) | INSN_OP3(0x38))
-#define RETURN     (INSN_OP(2) | INSN_OP3(0x39))
-#define SAVE       (INSN_OP(2) | INSN_OP3(0x3c))
-#define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d))
-#define SETHI      (INSN_OP(0) | INSN_OP2(0x4))
-#define CALL       INSN_OP(1)
-#define LDUB       (INSN_OP(3) | INSN_OP3(0x01))
-#define LDSB       (INSN_OP(3) | INSN_OP3(0x09))
-#define LDUH       (INSN_OP(3) | INSN_OP3(0x02))
-#define LDSH       (INSN_OP(3) | INSN_OP3(0x0a))
-#define LDUW       (INSN_OP(3) | INSN_OP3(0x00))
-#define LDSW       (INSN_OP(3) | INSN_OP3(0x08))
-#define LDX        (INSN_OP(3) | INSN_OP3(0x0b))
-#define STB        (INSN_OP(3) | INSN_OP3(0x05))
-#define STH        (INSN_OP(3) | INSN_OP3(0x06))
-#define STW        (INSN_OP(3) | INSN_OP3(0x04))
-#define STX        (INSN_OP(3) | INSN_OP3(0x0e))
-#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
-#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
-#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
-#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
-#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
-#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
-#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
-#define STBA       (INSN_OP(3) | INSN_OP3(0x15))
-#define STHA       (INSN_OP(3) | INSN_OP3(0x16))
-#define STWA       (INSN_OP(3) | INSN_OP3(0x14))
-#define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
-
-#define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
-
-#define NOP        (SETHI | INSN_RD(TCG_REG_G0) | 0)
-
-#ifndef ASI_PRIMARY_LITTLE
-#define ASI_PRIMARY_LITTLE 0x88
-#endif
-
-#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
-
-#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
-#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
-
-#ifndef use_vis3_instructions
-bool use_vis3_instructions;
-#endif
-
-static bool check_fit_i64(int64_t val, unsigned int bits)
-{
-    return val == sextract64(val, 0, bits);
-}
-
-static bool check_fit_i32(int32_t val, unsigned int bits)
-{
-    return val == sextract32(val, 0, bits);
-}
-
-#define check_fit_tl    check_fit_i64
-#define check_fit_ptr   check_fit_i64
-
-static bool patch_reloc(tcg_insn_unit *src_rw, int type,
-                        intptr_t value, intptr_t addend)
-{
-    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
-    uint32_t insn = *src_rw;
-    intptr_t pcrel;
-
-    value += addend;
-    pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx);
-
-    switch (type) {
-    case R_SPARC_WDISP16:
-        if (!check_fit_ptr(pcrel >> 2, 16)) {
-            return false;
-        }
-        insn &= ~INSN_OFF16(-1);
-        insn |= INSN_OFF16(pcrel);
-        break;
-    case R_SPARC_WDISP19:
-        if (!check_fit_ptr(pcrel >> 2, 19)) {
-            return false;
-        }
-        insn &= ~INSN_OFF19(-1);
-        insn |= INSN_OFF19(pcrel);
-        break;
-    case R_SPARC_13:
-        if (!check_fit_ptr(value, 13)) {
-            return false;
-        }
-        insn &= ~INSN_IMM13(-1);
-        insn |= INSN_IMM13(value);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    *src_rw = insn;
-    return true;
-}
-
-/* test if a constant matches the constraint */
-static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
-{
-    if (ct & TCG_CT_CONST) {
-        return 1;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        val = (int32_t)val;
-    }
-
-    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
-        return 1;
-    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
-        return 1;
-    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-static void tcg_out_nop(TCGContext *s)
-{
-    tcg_out32(s, NOP);
-}
-
-static void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
-                          TCGReg rs2, int op)
-{
-    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
-}
-
-static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
-                           int32_t offset, int op)
-{
-    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
-}
-
-static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
-			   int32_t val2, int val2const, int op)
-{
-    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
-              | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
-}
-
-static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
-{
-    if (ret != arg) {
-        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
-    }
-    return true;
-}
-
-static void tcg_out_mov_delay(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (ret != arg) {
-        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
-    } else {
-        tcg_out_nop(s);
-    }
-}
-
-static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
-{
-    tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
-}
-
-static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
-{
-    tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
-}
-
-static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
-{
-    if (check_fit_i32(arg, 13)) {
-        /* A 13-bit constant sign-extended to 64-bits.  */
-        tcg_out_movi_imm13(s, ret, arg);
-    } else {
-        /* A 32-bit constant zero-extended to 64 bits.  */
-        tcg_out_sethi(s, ret, arg);
-        if (arg & 0x3ff) {
-            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
-        }
-    }
-}
-
-static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
-                             tcg_target_long arg, bool in_prologue,
-                             TCGReg scratch)
-{
-    tcg_target_long hi, lo = (int32_t)arg;
-    tcg_target_long test, lsb;
-
-    /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
-    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
-        tcg_out_movi_imm32(s, ret, arg);
-        return;
-    }
-
-    /* A 13-bit constant sign-extended to 64-bits.  */
-    if (check_fit_tl(arg, 13)) {
-        tcg_out_movi_imm13(s, ret, arg);
-        return;
-    }
-
-    /* A 13-bit constant relative to the TB.  */
-    if (!in_prologue && USE_REG_TB) {
-        test = tcg_tbrel_diff(s, (void *)arg);
-        if (check_fit_ptr(test, 13)) {
-            tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD);
-            return;
-        }
-    }
-
-    /* A 32-bit constant sign-extended to 64-bits.  */
-    if (arg == lo) {
-        tcg_out_sethi(s, ret, ~arg);
-        tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
-        return;
-    }
-
-    /* A 32-bit constant, shifted.  */
-    lsb = ctz64(arg);
-    test = (tcg_target_long)arg >> lsb;
-    if (lsb > 10 && test == extract64(test, 0, 21)) {
-        tcg_out_sethi(s, ret, test << 10);
-        tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
-        return;
-    } else if (test == (uint32_t)test || test == (int32_t)test) {
-        tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch);
-        tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
-        return;
-    }
-
-    /* Use the constant pool, if possible. */
-    if (!in_prologue && USE_REG_TB) {
-        new_pool_label(s, arg, R_SPARC_13, s->code_ptr,
-                       tcg_tbrel_diff(s, NULL));
-        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB));
-        return;
-    }
-
-    /* A 64-bit constant decomposed into 2 32-bit pieces.  */
-    if (check_fit_i32(lo, 13)) {
-        hi = (arg - lo) >> 32;
-        tcg_out_movi_imm32(s, ret, hi);
-        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
-        tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
-    } else {
-        hi = arg >> 32;
-        tcg_out_movi_imm32(s, ret, hi);
-        tcg_out_movi_imm32(s, scratch, lo);
-        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
-        tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
-    }
-}
-
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         TCGReg ret, tcg_target_long arg)
-{
-    tcg_debug_assert(ret != TCG_REG_T2);
-    tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
-}
-
-static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
-                            TCGReg a2, int op)
-{
-    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
-}
-
-static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
-                         intptr_t offset, int op)
-{
-    if (check_fit_ptr(offset, 13)) {
-        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
-                  INSN_IMM13(offset));
-    } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
-        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
-    }
-}
-
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
-                       TCGReg arg1, intptr_t arg2)
-{
-    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
-}
-
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                       TCGReg arg1, intptr_t arg2)
-{
-    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
-}
-
-static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                        TCGReg base, intptr_t ofs)
-{
-    if (val == 0) {
-        tcg_out_st(s, type, TCG_REG_G0, base, ofs);
-        return true;
-    }
-    return false;
-}
-
-static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg)
-{
-    intptr_t diff = tcg_tbrel_diff(s, arg);
-    if (USE_REG_TB && check_fit_ptr(diff, 13)) {
-        tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
-        return;
-    }
-    tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff);
-    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff);
-}
-
-static void tcg_out_sety(TCGContext *s, TCGReg rs)
-{
-    tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
-}
-
-static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
-                          int32_t val2, int val2const, int uns)
-{
-    /* Load Y with the sign/zero extension of RS1 to 64-bits.  */
-    if (uns) {
-        tcg_out_sety(s, TCG_REG_G0);
-    } else {
-        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
-        tcg_out_sety(s, TCG_REG_T1);
-    }
-
-    tcg_out_arithc(s, rd, rs1, val2, val2const,
-                   uns ? ARITH_UDIV : ARITH_SDIV);
-}
-
-static const uint8_t tcg_cond_to_bcond[] = {
-    [TCG_COND_EQ] = COND_E,
-    [TCG_COND_NE] = COND_NE,
-    [TCG_COND_LT] = COND_L,
-    [TCG_COND_GE] = COND_GE,
-    [TCG_COND_LE] = COND_LE,
-    [TCG_COND_GT] = COND_G,
-    [TCG_COND_LTU] = COND_CS,
-    [TCG_COND_GEU] = COND_CC,
-    [TCG_COND_LEU] = COND_LEU,
-    [TCG_COND_GTU] = COND_GU,
-};
-
-static const uint8_t tcg_cond_to_rcond[] = {
-    [TCG_COND_EQ] = RCOND_Z,
-    [TCG_COND_NE] = RCOND_NZ,
-    [TCG_COND_LT] = RCOND_LZ,
-    [TCG_COND_GT] = RCOND_GZ,
-    [TCG_COND_LE] = RCOND_LEZ,
-    [TCG_COND_GE] = RCOND_GEZ
-};
-
-static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
-{
-    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
-}
-
-static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
-{
-    int off19 = 0;
-
-    if (l->has_value) {
-        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
-    } else {
-        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
-    }
-    tcg_out_bpcc0(s, scond, flags, off19);
-}
-
-static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
-{
-    tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
-}
-
-static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
-                               int32_t arg2, int const_arg2, TCGLabel *l)
-{
-    tcg_out_cmp(s, arg1, arg2, const_arg2);
-    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
-    tcg_out_nop(s);
-}
-
-static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
-                          int32_t v1, int v1const)
-{
-    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
-              | INSN_RS1(tcg_cond_to_bcond[cond])
-              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
-}
-
-static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
-                                TCGReg c1, int32_t c2, int c2const,
-                                int32_t v1, int v1const)
-{
-    tcg_out_cmp(s, c1, c2, c2const);
-    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
-}
-
-static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
-                               int32_t arg2, int const_arg2, TCGLabel *l)
-{
-    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
-    if (arg2 == 0 && !is_unsigned_cond(cond)) {
-        int off16 = 0;
-
-        if (l->has_value) {
-            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
-        } else {
-            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
-        }
-        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
-                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
-    } else {
-        tcg_out_cmp(s, arg1, arg2, const_arg2);
-        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
-    }
-    tcg_out_nop(s);
-}
-
-static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
-                         int32_t v1, int v1const)
-{
-    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
-              | (tcg_cond_to_rcond[cond] << 10)
-              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
-}
-
-static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
-                                TCGReg c1, int32_t c2, int c2const,
-                                int32_t v1, int v1const)
-{
-    /* For 64-bit signed comparisons vs zero, we can avoid the compare.
-       Note that the immediate range is one bit smaller, so we must check
-       for that as well.  */
-    if (c2 == 0 && !is_unsigned_cond(cond)
-        && (!v1const || check_fit_i32(v1, 10))) {
-        tcg_out_movr(s, cond, ret, c1, v1, v1const);
-    } else {
-        tcg_out_cmp(s, c1, c2, c2const);
-        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
-    }
-}
-
-static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
-                                TCGReg c1, int32_t c2, int c2const)
-{
-    /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */
-    switch (cond) {
-    case TCG_COND_LTU:
-    case TCG_COND_GEU:
-        /* The result of the comparison is in the carry bit.  */
-        break;
-
-    case TCG_COND_EQ:
-    case TCG_COND_NE:
-        /* For equality, we can transform to inequality vs zero.  */
-        if (c2 != 0) {
-            tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
-            c2 = TCG_REG_T1;
-        } else {
-            c2 = c1;
-        }
-        c1 = TCG_REG_G0, c2const = 0;
-        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
-	break;
-
-    case TCG_COND_GTU:
-    case TCG_COND_LEU:
-        /* If we don't need to load a constant into a register, we can
-           swap the operands on GTU/LEU.  There's no benefit to loading
-           the constant into a temporary register.  */
-        if (!c2const || c2 == 0) {
-            TCGReg t = c1;
-            c1 = c2;
-            c2 = t;
-            c2const = 0;
-            cond = tcg_swap_cond(cond);
-            break;
-        }
-        /* FALLTHRU */
-
-    default:
-        tcg_out_cmp(s, c1, c2, c2const);
-        tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
-        return;
-    }
-
-    tcg_out_cmp(s, c1, c2, c2const);
-    if (cond == TCG_COND_LTU) {
-        tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
-    } else {
-        tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
-    }
-}
-
-static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
-                                TCGReg c1, int32_t c2, int c2const)
-{
-    if (use_vis3_instructions) {
-        switch (cond) {
-        case TCG_COND_NE:
-            if (c2 != 0) {
-                break;
-            }
-            c2 = c1, c2const = 0, c1 = TCG_REG_G0;
-            /* FALLTHRU */
-        case TCG_COND_LTU:
-            tcg_out_cmp(s, c1, c2, c2const);
-            tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
-            return;
-        default:
-            break;
-        }
-    }
-
-    /* For 64-bit signed comparisons vs zero, we can avoid the compare
-       if the input does not overlap the output.  */
-    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
-        tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_movr(s, cond, ret, c1, 1, 1);
-    } else {
-        tcg_out_cmp(s, c1, c2, c2const);
-        tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
-    }
-}
-
-static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
-                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
-                                int32_t bh, int bhconst, int opl, int oph)
-{
-    TCGReg tmp = TCG_REG_T1;
-
-    /* Note that the low parts are fully consumed before tmp is set.  */
-    if (rl != ah && (bhconst || rl != bh)) {
-        tmp = rl;
-    }
-
-    tcg_out_arithc(s, tmp, al, bl, blconst, opl);
-    tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
-    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
-}
-
-static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
-                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
-                                int32_t bh, int bhconst, bool is_sub)
-{
-    TCGReg tmp = TCG_REG_T1;
-
-    /* Note that the low parts are fully consumed before tmp is set.  */
-    if (rl != ah && (bhconst || rl != bh)) {
-        tmp = rl;
-    }
-
-    tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
-
-    if (use_vis3_instructions && !is_sub) {
-        /* Note that ADDXC doesn't accept immediates.  */
-        if (bhconst && bh != 0) {
-           tcg_out_movi_imm13(s, TCG_REG_T2, bh);
-           bh = TCG_REG_T2;
-        }
-        tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
-    } else if (bh == TCG_REG_G0) {
-	/* If we have a zero, we can perform the operation in two insns,
-           with the arithmetic first, and a conditional move into place.  */
-	if (rh == ah) {
-            tcg_out_arithi(s, TCG_REG_T2, ah, 1,
-			   is_sub ? ARITH_SUB : ARITH_ADD);
-            tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
-	} else {
-            tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
-	    tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
-	}
-    } else {
-        /*
-         * Otherwise adjust BH as if there is carry into T2.
-         * Note that constant BH is constrained to 11 bits for the MOVCC,
-         * so the adjustment fits 12 bits.
-         */
-        if (bhconst) {
-            tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
-        } else {
-            tcg_out_arithi(s, TCG_REG_T2, bh, 1,
-                           is_sub ? ARITH_SUB : ARITH_ADD);
-        }
-        /* ... smoosh T2 back to original BH if carry is clear ... */
-        tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
-	/* ... and finally perform the arithmetic with the new operand.  */
-        tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
-    }
-
-    tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
-}
-
-static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
-                               bool in_prologue, bool tail_call)
-{
-    uintptr_t desti = (uintptr_t)dest;
-
-    /* Be careful not to clobber %o7 for a tail call. */
-    tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
-                     desti & ~0xfff, in_prologue,
-                     tail_call ? TCG_REG_G2 : TCG_REG_O7);
-    tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
-                   TCG_REG_T1, desti & 0xfff, JMPL);
-}
-
-static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
-                                 bool in_prologue)
-{
-    ptrdiff_t disp = tcg_pcrel_diff(s, dest);
-
-    if (disp == (int32_t)disp) {
-        tcg_out32(s, CALL | (uint32_t)disp >> 2);
-    } else {
-        tcg_out_jmpl_const(s, dest, in_prologue, false);
-    }
-}
-
-static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
-{
-    tcg_out_call_nodelay(s, dest, false);
-    tcg_out_nop(s);
-}
-
-static void tcg_out_mb(TCGContext *s, TCGArg a0)
-{
-    /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
-    tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
-}
-
-#ifdef CONFIG_SOFTMMU
-static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
-static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
-
-static void emit_extend(TCGContext *s, TCGReg r, int op)
-{
-    /* Emit zero extend of 8, 16 or 32 bit data as
-     * required by the MO_* value op; do nothing for 64 bit.
-     */
-    switch (op & MO_SIZE) {
-    case MO_8:
-        tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
-        break;
-    case MO_16:
-        tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
-        tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
-        break;
-    case MO_32:
-        tcg_out_arith(s, r, r, 0, SHIFT_SRL);
-        break;
-    case MO_64:
-        break;
-    }
-}
-
-static void build_trampolines(TCGContext *s)
-{
-    static void * const qemu_ld_helpers[] = {
-        [MO_UB]   = helper_ret_ldub_mmu,
-        [MO_SB]   = helper_ret_ldsb_mmu,
-        [MO_LEUW] = helper_le_lduw_mmu,
-        [MO_LESW] = helper_le_ldsw_mmu,
-        [MO_LEUL] = helper_le_ldul_mmu,
-        [MO_LEUQ] = helper_le_ldq_mmu,
-        [MO_BEUW] = helper_be_lduw_mmu,
-        [MO_BESW] = helper_be_ldsw_mmu,
-        [MO_BEUL] = helper_be_ldul_mmu,
-        [MO_BEUQ] = helper_be_ldq_mmu,
-    };
-    static void * const qemu_st_helpers[] = {
-        [MO_UB]   = helper_ret_stb_mmu,
-        [MO_LEUW] = helper_le_stw_mmu,
-        [MO_LEUL] = helper_le_stl_mmu,
-        [MO_LEUQ] = helper_le_stq_mmu,
-        [MO_BEUW] = helper_be_stw_mmu,
-        [MO_BEUL] = helper_be_stl_mmu,
-        [MO_BEUQ] = helper_be_stq_mmu,
-    };
-
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
-        if (qemu_ld_helpers[i] == NULL) {
-            continue;
-        }
-
-        /* May as well align the trampoline.  */
-        while ((uintptr_t)s->code_ptr & 15) {
-            tcg_out_nop(s);
-        }
-        qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
-
-        /* Set the retaddr operand.  */
-        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
-        /* Tail call.  */
-        tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
-        /* delay slot -- set the env argument */
-        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
-    }
-
-    for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
-        if (qemu_st_helpers[i] == NULL) {
-            continue;
-        }
-
-        /* May as well align the trampoline.  */
-        while ((uintptr_t)s->code_ptr & 15) {
-            tcg_out_nop(s);
-        }
-        qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
-
-        emit_extend(s, TCG_REG_O2, i);
-
-        /* Set the retaddr operand.  */
-        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
-
-        /* Tail call.  */
-        tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
-        /* delay slot -- set the env argument */
-        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
-    }
-}
-#else
-static const tcg_insn_unit *qemu_unalign_ld_trampoline;
-static const tcg_insn_unit *qemu_unalign_st_trampoline;
-
-static void build_trampolines(TCGContext *s)
-{
-    for (int ld = 0; ld < 2; ++ld) {
-        void *helper;
-
-        while ((uintptr_t)s->code_ptr & 15) {
-            tcg_out_nop(s);
-        }
-
-        if (ld) {
-            helper = helper_unaligned_ld;
-            qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
-        } else {
-            helper = helper_unaligned_st;
-            qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
-        }
-
-        /* Tail call.  */
-        tcg_out_jmpl_const(s, helper, true, true);
-        /* delay slot -- set the env argument */
-        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
-    }
-}
-#endif
-
-/* Generate global QEMU prologue and epilogue code */
-static void tcg_target_qemu_prologue(TCGContext *s)
-{
-    int tmp_buf_size, frame_size;
-
-    /*
-     * The TCG temp buffer is at the top of the frame, immediately
-     * below the frame pointer.  Use the logical (aligned) offset here;
-     * the stack bias is applied in temp_allocate_frame().
-     */
-    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
-    tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size);
-
-    /*
-     * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
-     * otherwise the minimal frame usable by callees.
-     */
-    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
-    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
-    frame_size += TCG_TARGET_STACK_ALIGN - 1;
-    frame_size &= -TCG_TARGET_STACK_ALIGN;
-    tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
-              INSN_IMM13(-frame_size));
-
-#ifndef CONFIG_SOFTMMU
-    if (guest_base != 0) {
-        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG,
-                         guest_base, true, TCG_REG_T1);
-        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-    }
-#endif
-
-    /* We choose TCG_REG_TB such that no move is required.  */
-    if (USE_REG_TB) {
-        QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
-        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
-    }
-
-    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
-    /* delay slot */
-    tcg_out_nop(s);
-
-    /* Epilogue for goto_ptr.  */
-    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
-    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-    /* delay slot */
-    tcg_out_movi_imm13(s, TCG_REG_O0, 0);
-
-    build_trampolines(s);
-}
-
-static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
-{
-    int i;
-    for (i = 0; i < count; ++i) {
-        p[i] = NOP;
-    }
-}
-
-#if defined(CONFIG_SOFTMMU)
-
-/* We expect to use a 13-bit negative offset from ENV.  */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
-
-/* Perform the TLB load and compare.
-
-   Inputs:
-   ADDRLO and ADDRHI contain the possible two parts of the address.
-
-   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
-
-   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
-   This should be offsetof addr_read or addr_write.
-
-   The result of the TLB comparison is in %[ix]cc.  The sanitized address
-   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
-
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
-                               MemOp opc, int which)
-{
-    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
-    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
-    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
-    const TCGReg r0 = TCG_REG_O0;
-    const TCGReg r1 = TCG_REG_O1;
-    const TCGReg r2 = TCG_REG_O2;
-    unsigned s_bits = opc & MO_SIZE;
-    unsigned a_bits = get_alignment_bits(opc);
-    tcg_target_long compare_mask;
-
-    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
-    tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
-    tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
-
-    /* Extract the page index, shifted into place for tlb index.  */
-    tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
-                   SHIFT_SRL);
-    tcg_out_arith(s, r2, r2, r0, ARITH_AND);
-
-    /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2.  */
-    tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
-
-    /* Load the tlb comparator and the addend.  */
-    tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
-    tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
-
-    /* Mask out the page offset, except for the required alignment.
-       We don't support unaligned accesses.  */
-    if (a_bits < s_bits) {
-        a_bits = s_bits;
-    }
-    compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
-    if (check_fit_tl(compare_mask, 13)) {
-        tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
-        tcg_out_arith(s, r2, addr, r2, ARITH_AND);
-    }
-    tcg_out_cmp(s, r0, r2, 0);
-
-    /* If the guest address must be zero-extended, do so now.  */
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
-        return r0;
-    }
-    return addr;
-}
-#endif /* CONFIG_SOFTMMU */
-
-static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
-    [MO_UB]   = LDUB,
-    [MO_SB]   = LDSB,
-    [MO_UB | MO_LE] = LDUB,
-    [MO_SB | MO_LE] = LDSB,
-
-    [MO_BEUW] = LDUH,
-    [MO_BESW] = LDSH,
-    [MO_BEUL] = LDUW,
-    [MO_BESL] = LDSW,
-    [MO_BEUQ] = LDX,
-    [MO_BESQ] = LDX,
-
-    [MO_LEUW] = LDUH_LE,
-    [MO_LESW] = LDSH_LE,
-    [MO_LEUL] = LDUW_LE,
-    [MO_LESL] = LDSW_LE,
-    [MO_LEUQ] = LDX_LE,
-    [MO_LESQ] = LDX_LE,
-};
-
-static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
-    [MO_UB]   = STB,
-
-    [MO_BEUW] = STH,
-    [MO_BEUL] = STW,
-    [MO_BEUQ] = STX,
-
-    [MO_LEUW] = STH_LE,
-    [MO_LEUL] = STW_LE,
-    [MO_LEUQ] = STX_LE,
-};
-
-static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
-                            MemOpIdx oi, bool is_64)
-{
-    MemOp memop = get_memop(oi);
-    tcg_insn_unit *label_ptr;
-
-#ifdef CONFIG_SOFTMMU
-    unsigned memi = get_mmuidx(oi);
-    TCGReg addrz;
-    const tcg_insn_unit *func;
-
-    addrz = tcg_out_tlb_load(s, addr, memi, memop,
-                             offsetof(CPUTLBEntry, addr_read));
-
-    /* The fast path is exactly one insn.  Thus we can perform the
-       entire TLB Hit in the (annulled) delay slot of the branch
-       over the TLB Miss case.  */
-
-    /* beq,a,pt %[xi]cc, label0 */
-    label_ptr = s->code_ptr;
-    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
-                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
-    /* delay slot */
-    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
-                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
-
-    /* TLB Miss.  */
-
-    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
-
-    /* We use the helpers to extend SB and SW data, leaving the case
-       of SL needing explicit extending below.  */
-    if ((memop & MO_SSIZE) == MO_SL) {
-        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
-    } else {
-        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
-    }
-    tcg_debug_assert(func != NULL);
-    tcg_out_call_nodelay(s, func, false);
-    /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
-
-    /* We let the helper sign-extend SB and SW, but leave SL for here.  */
-    if (is_64 && (memop & MO_SSIZE) == MO_SL) {
-        tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
-    } else {
-        tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
-    }
-
-    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
-#else
-    TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
-    unsigned a_bits = get_alignment_bits(memop);
-    unsigned s_bits = memop & MO_SIZE;
-    unsigned t_bits;
-
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
-        addr = TCG_REG_T1;
-    }
-
-    /*
-     * Normal case: alignment equal to access size.
-     */
-    if (a_bits == s_bits) {
-        tcg_out_ldst_rr(s, data, addr, index,
-                        qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
-        return;
-    }
-
-    /*
-     * Test for at least natural alignment, and assume most accesses
-     * will be aligned -- perform a straight load in the delay slot.
-     * This is required to preserve atomicity for aligned accesses.
-     */
-    t_bits = MAX(a_bits, s_bits);
-    tcg_debug_assert(t_bits < 13);
-    tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
-
-    /* beq,a,pt %icc, label */
-    label_ptr = s->code_ptr;
-    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
-    /* delay slot */
-    tcg_out_ldst_rr(s, data, addr, index,
-                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
-
-    if (a_bits >= s_bits) {
-        /*
-         * Overalignment: A successful alignment test will perform the memory
-         * operation in the delay slot, and failure need only invoke the
-         * handler for SIGBUS.
-         */
-        tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
-        /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, TCG_REG_O1, addr);
-    } else {
-        /* Underalignment: load by pieces of minimum alignment. */
-        int ld_opc, a_size, s_size, i;
-
-        /*
-         * Force full address into T1 early; avoids problems with
-         * overlap between @addr and @data.
-         */
-        tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
-
-        a_size = 1 << a_bits;
-        s_size = 1 << s_bits;
-        if ((memop & MO_BSWAP) == MO_BE) {
-            ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
-            tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
-            ld_opc = qemu_ld_opc[a_bits | MO_BE];
-            for (i = a_size; i < s_size; i += a_size) {
-                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
-                tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
-                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
-            }
-        } else if (a_bits == 0) {
-            ld_opc = LDUB;
-            tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
-            for (i = a_size; i < s_size; i += a_size) {
-                if ((memop & MO_SIGN) && i == s_size - a_size) {
-                    ld_opc = LDSB;
-                }
-                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
-                tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
-                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
-            }
-        } else {
-            ld_opc = qemu_ld_opc[a_bits | MO_LE];
-            tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
-            for (i = a_size; i < s_size; i += a_size) {
-                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
-                if ((memop & MO_SIGN) && i == s_size - a_size) {
-                    ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
-                }
-                tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
-                tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
-                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
-            }
-        }
-    }
-
-    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
-#endif /* CONFIG_SOFTMMU */
-}
-
-static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
-                            MemOpIdx oi)
-{
-    MemOp memop = get_memop(oi);
-    tcg_insn_unit *label_ptr;
-
-#ifdef CONFIG_SOFTMMU
-    unsigned memi = get_mmuidx(oi);
-    TCGReg addrz;
-    const tcg_insn_unit *func;
-
-    addrz = tcg_out_tlb_load(s, addr, memi, memop,
-                             offsetof(CPUTLBEntry, addr_write));
-
-    /* The fast path is exactly one insn.  Thus we can perform the entire
-       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
-    /* beq,a,pt %[xi]cc, label0 */
-    label_ptr = s->code_ptr;
-    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
-                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
-    /* delay slot */
-    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
-                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
-
-    /* TLB Miss.  */
-
-    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
-    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
-
-    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
-    tcg_debug_assert(func != NULL);
-    tcg_out_call_nodelay(s, func, false);
-    /* delay slot */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
-
-    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
-#else
-    TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
-    unsigned a_bits = get_alignment_bits(memop);
-    unsigned s_bits = memop & MO_SIZE;
-    unsigned t_bits;
-
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
-        addr = TCG_REG_T1;
-    }
-
-    /*
-     * Normal case: alignment equal to access size.
-     */
-    if (a_bits == s_bits) {
-        tcg_out_ldst_rr(s, data, addr, index,
-                        qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
-        return;
-    }
-
-    /*
-     * Test for at least natural alignment, and assume most accesses
-     * will be aligned -- perform a straight store in the delay slot.
-     * This is required to preserve atomicity for aligned accesses.
-     */
-    t_bits = MAX(a_bits, s_bits);
-    tcg_debug_assert(t_bits < 13);
-    tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
-
-    /* beq,a,pt %icc, label */
-    label_ptr = s->code_ptr;
-    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
-    /* delay slot */
-    tcg_out_ldst_rr(s, data, addr, index,
-                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
-
-    if (a_bits >= s_bits) {
-        /*
-         * Overalignment: A successful alignment test will perform the memory
-         * operation in the delay slot, and failure need only invoke the
-         * handler for SIGBUS.
-         */
-        tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
-        /* delay slot -- move to low part of argument reg */
-        tcg_out_mov_delay(s, TCG_REG_O1, addr);
-    } else {
-        /* Underalignment: store by pieces of minimum alignment. */
-        int st_opc, a_size, s_size, i;
-
-        /*
-         * Force full address into T1 early; avoids problems with
-         * overlap between @addr and @data.
-         */
-        tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
-
-        a_size = 1 << a_bits;
-        s_size = 1 << s_bits;
-        if ((memop & MO_BSWAP) == MO_BE) {
-            st_opc = qemu_st_opc[a_bits | MO_BE];
-            for (i = 0; i < s_size; i += a_size) {
-                TCGReg d = data;
-                int shift = (s_size - a_size - i) * 8;
-                if (shift) {
-                    d = TCG_REG_T2;
-                    tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
-                }
-                tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
-            }
-        } else if (a_bits == 0) {
-            tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
-            for (i = 1; i < s_size; i++) {
-                tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
-                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
-            }
-        } else {
-            /* Note that ST*A with immediate asi must use indexed address. */
-            st_opc = qemu_st_opc[a_bits + MO_LE];
-            tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
-            for (i = a_size; i < s_size; i += a_size) {
-                tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
-                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
-                tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
-            }
-        }
-    }
-
-    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
-#endif /* CONFIG_SOFTMMU */
-}
-
-static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                       const TCGArg args[TCG_MAX_OP_ARGS],
-                       const int const_args[TCG_MAX_OP_ARGS])
-{
-    TCGArg a0, a1, a2;
-    int c, c2;
-
-    /* Hoist the loads of the most common arguments.  */
-    a0 = args[0];
-    a1 = args[1];
-    a2 = args[2];
-    c2 = const_args[2];
-
-    switch (opc) {
-    case INDEX_op_exit_tb:
-        if (check_fit_ptr(a0, 13)) {
-            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-            tcg_out_movi_imm13(s, TCG_REG_O0, a0);
-            break;
-        } else if (USE_REG_TB) {
-            intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
-            if (check_fit_ptr(tb_diff, 13)) {
-                tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-                /* Note that TCG_REG_TB has been unwound to O1.  */
-                tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
-                break;
-            }
-        }
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
-        tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-        tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
-        break;
-    case INDEX_op_goto_tb:
-        if (s->tb_jmp_insn_offset) {
-            /* direct jump method */
-            if (USE_REG_TB) {
-                /* make sure the patch is 8-byte aligned.  */
-                if ((intptr_t)s->code_ptr & 4) {
-                    tcg_out_nop(s);
-                }
-                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-                tcg_out_sethi(s, TCG_REG_T1, 0);
-                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
-                tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
-                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
-            } else {
-                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-                tcg_out32(s, CALL);
-                tcg_out_nop(s);
-            }
-        } else {
-            /* indirect jump method */
-            tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0);
-            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
-            tcg_out_nop(s);
-        }
-        set_jmp_reset_offset(s, a0);
-
-        /* For the unlinked path of goto_tb, we need to reset
-           TCG_REG_TB to the beginning of this TB.  */
-        if (USE_REG_TB) {
-            c = -tcg_current_code_size(s);
-            if (check_fit_i32(c, 13)) {
-                tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
-            } else {
-                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
-                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
-                              TCG_REG_T1, ARITH_ADD);
-            }
-        }
-        break;
-    case INDEX_op_goto_ptr:
-        tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
-        if (USE_REG_TB) {
-            tcg_out_mov_delay(s, TCG_REG_TB, a0);
-        } else {
-            tcg_out_nop(s);
-        }
-        break;
-    case INDEX_op_br:
-        tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
-        tcg_out_nop(s);
-        break;
-
-#define OP_32_64(x)                             \
-        glue(glue(case INDEX_op_, x), _i32):    \
-        glue(glue(case INDEX_op_, x), _i64)
-
-    OP_32_64(ld8u):
-        tcg_out_ldst(s, a0, a1, a2, LDUB);
-        break;
-    OP_32_64(ld8s):
-        tcg_out_ldst(s, a0, a1, a2, LDSB);
-        break;
-    OP_32_64(ld16u):
-        tcg_out_ldst(s, a0, a1, a2, LDUH);
-        break;
-    OP_32_64(ld16s):
-        tcg_out_ldst(s, a0, a1, a2, LDSH);
-        break;
-    case INDEX_op_ld_i32:
-    case INDEX_op_ld32u_i64:
-        tcg_out_ldst(s, a0, a1, a2, LDUW);
-        break;
-    OP_32_64(st8):
-        tcg_out_ldst(s, a0, a1, a2, STB);
-        break;
-    OP_32_64(st16):
-        tcg_out_ldst(s, a0, a1, a2, STH);
-        break;
-    case INDEX_op_st_i32:
-    case INDEX_op_st32_i64:
-        tcg_out_ldst(s, a0, a1, a2, STW);
-        break;
-    OP_32_64(add):
-        c = ARITH_ADD;
-        goto gen_arith;
-    OP_32_64(sub):
-        c = ARITH_SUB;
-        goto gen_arith;
-    OP_32_64(and):
-        c = ARITH_AND;
-        goto gen_arith;
-    OP_32_64(andc):
-        c = ARITH_ANDN;
-        goto gen_arith;
-    OP_32_64(or):
-        c = ARITH_OR;
-        goto gen_arith;
-    OP_32_64(orc):
-        c = ARITH_ORN;
-        goto gen_arith;
-    OP_32_64(xor):
-        c = ARITH_XOR;
-        goto gen_arith;
-    case INDEX_op_shl_i32:
-        c = SHIFT_SLL;
-    do_shift32:
-        /* Limit immediate shift count lest we create an illegal insn.  */
-        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
-        break;
-    case INDEX_op_shr_i32:
-        c = SHIFT_SRL;
-        goto do_shift32;
-    case INDEX_op_sar_i32:
-        c = SHIFT_SRA;
-        goto do_shift32;
-    case INDEX_op_mul_i32:
-        c = ARITH_UMUL;
-        goto gen_arith;
-
-    OP_32_64(neg):
-	c = ARITH_SUB;
-	goto gen_arith1;
-    OP_32_64(not):
-	c = ARITH_ORN;
-	goto gen_arith1;
-
-    case INDEX_op_div_i32:
-        tcg_out_div32(s, a0, a1, a2, c2, 0);
-        break;
-    case INDEX_op_divu_i32:
-        tcg_out_div32(s, a0, a1, a2, c2, 1);
-        break;
-
-    case INDEX_op_brcond_i32:
-        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
-        break;
-    case INDEX_op_setcond_i32:
-        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
-        break;
-    case INDEX_op_movcond_i32:
-        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
-        break;
-
-    case INDEX_op_add2_i32:
-        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
-                            args[4], const_args[4], args[5], const_args[5],
-                            ARITH_ADDCC, ARITH_ADDC);
-        break;
-    case INDEX_op_sub2_i32:
-        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
-                            args[4], const_args[4], args[5], const_args[5],
-                            ARITH_SUBCC, ARITH_SUBC);
-        break;
-    case INDEX_op_mulu2_i32:
-        c = ARITH_UMUL;
-        goto do_mul2;
-    case INDEX_op_muls2_i32:
-        c = ARITH_SMUL;
-    do_mul2:
-        /* The 32-bit multiply insns produce a full 64-bit result. */
-        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
-        tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
-        break;
-
-    case INDEX_op_qemu_ld_i32:
-        tcg_out_qemu_ld(s, a0, a1, a2, false);
-        break;
-    case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, a0, a1, a2, true);
-        break;
-    case INDEX_op_qemu_st_i32:
-    case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, a0, a1, a2);
-        break;
-
-    case INDEX_op_ld32s_i64:
-        tcg_out_ldst(s, a0, a1, a2, LDSW);
-        break;
-    case INDEX_op_ld_i64:
-        tcg_out_ldst(s, a0, a1, a2, LDX);
-        break;
-    case INDEX_op_st_i64:
-        tcg_out_ldst(s, a0, a1, a2, STX);
-        break;
-    case INDEX_op_shl_i64:
-        c = SHIFT_SLLX;
-    do_shift64:
-        /* Limit immediate shift count lest we create an illegal insn.  */
-        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
-        break;
-    case INDEX_op_shr_i64:
-        c = SHIFT_SRLX;
-        goto do_shift64;
-    case INDEX_op_sar_i64:
-        c = SHIFT_SRAX;
-        goto do_shift64;
-    case INDEX_op_mul_i64:
-        c = ARITH_MULX;
-        goto gen_arith;
-    case INDEX_op_div_i64:
-        c = ARITH_SDIVX;
-        goto gen_arith;
-    case INDEX_op_divu_i64:
-        c = ARITH_UDIVX;
-        goto gen_arith;
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_ext32s_i64:
-        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
-        break;
-    case INDEX_op_extu_i32_i64:
-    case INDEX_op_ext32u_i64:
-        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
-        break;
-    case INDEX_op_extrl_i64_i32:
-        tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-        break;
-    case INDEX_op_extrh_i64_i32:
-        tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
-        break;
-
-    case INDEX_op_brcond_i64:
-        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
-        break;
-    case INDEX_op_setcond_i64:
-        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
-        break;
-    case INDEX_op_movcond_i64:
-        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
-        break;
-    case INDEX_op_add2_i64:
-        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
-                            const_args[4], args[5], const_args[5], false);
-        break;
-    case INDEX_op_sub2_i64:
-        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
-                            const_args[4], args[5], const_args[5], true);
-        break;
-    case INDEX_op_muluh_i64:
-        tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
-        break;
-
-    gen_arith:
-        tcg_out_arithc(s, a0, a1, a2, c2, c);
-        break;
-
-    gen_arith1:
-	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
-	break;
-
-    case INDEX_op_mb:
-        tcg_out_mb(s, a0);
-        break;
-
-    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
-    case INDEX_op_mov_i64:
-    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
-    default:
-        tcg_abort();
-    }
-}
-
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-{
-    switch (op) {
-    case INDEX_op_goto_ptr:
-        return C_O0_I1(r);
-
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8s_i32:
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16s_i32:
-    case INDEX_op_ld_i32:
-    case INDEX_op_neg_i32:
-    case INDEX_op_not_i32:
-        return C_O1_I1(r, r);
-
-    case INDEX_op_st8_i32:
-    case INDEX_op_st16_i32:
-    case INDEX_op_st_i32:
-        return C_O0_I2(rZ, r);
-
-    case INDEX_op_add_i32:
-    case INDEX_op_mul_i32:
-    case INDEX_op_div_i32:
-    case INDEX_op_divu_i32:
-    case INDEX_op_sub_i32:
-    case INDEX_op_and_i32:
-    case INDEX_op_andc_i32:
-    case INDEX_op_or_i32:
-    case INDEX_op_orc_i32:
-    case INDEX_op_xor_i32:
-    case INDEX_op_shl_i32:
-    case INDEX_op_shr_i32:
-    case INDEX_op_sar_i32:
-    case INDEX_op_setcond_i32:
-        return C_O1_I2(r, rZ, rJ);
-
-    case INDEX_op_brcond_i32:
-        return C_O0_I2(rZ, rJ);
-    case INDEX_op_movcond_i32:
-        return C_O1_I4(r, rZ, rJ, rI, 0);
-    case INDEX_op_add2_i32:
-    case INDEX_op_sub2_i32:
-        return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
-    case INDEX_op_mulu2_i32:
-    case INDEX_op_muls2_i32:
-        return C_O2_I2(r, r, rZ, rJ);
-
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-        return C_O1_I1(R, r);
-
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
-        return C_O0_I2(RZ, r);
-
-    case INDEX_op_add_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_div_i64:
-    case INDEX_op_divu_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_setcond_i64:
-        return C_O1_I2(R, RZ, RJ);
-
-    case INDEX_op_neg_i64:
-    case INDEX_op_not_i64:
-    case INDEX_op_ext32s_i64:
-    case INDEX_op_ext32u_i64:
-        return C_O1_I1(R, R);
-
-    case INDEX_op_extrl_i64_i32:
-    case INDEX_op_extrh_i64_i32:
-        return C_O1_I1(r, R);
-
-    case INDEX_op_brcond_i64:
-        return C_O0_I2(RZ, RJ);
-    case INDEX_op_movcond_i64:
-        return C_O1_I4(R, RZ, RJ, RI, 0);
-    case INDEX_op_add2_i64:
-    case INDEX_op_sub2_i64:
-        return C_O2_I4(R, R, RZ, RZ, RJ, RI);
-    case INDEX_op_muluh_i64:
-        return C_O1_I2(R, R, R);
-
-    case INDEX_op_qemu_ld_i32:
-        return C_O1_I1(r, A);
-    case INDEX_op_qemu_ld_i64:
-        return C_O1_I1(R, A);
-    case INDEX_op_qemu_st_i32:
-        return C_O0_I2(sZ, A);
-    case INDEX_op_qemu_st_i64:
-        return C_O0_I2(SZ, A);
-
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void tcg_target_init(TCGContext *s)
-{
-    /*
-     * Only probe for the platform and capabilities if we haven't already
-     * determined maximum values at compile time.
-     */
-#ifndef use_vis3_instructions
-    {
-        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
-        use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
-    }
-#endif
-
-    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
-    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
-
-    tcg_target_call_clobber_regs = 0;
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
-
-    s->reserved_regs = 0;
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
-}
-
-#define ELF_HOST_MACHINE  EM_SPARCV9
-
-typedef struct {
-    DebugFrameHeader h;
-    uint8_t fde_def_cfa[4];
-    uint8_t fde_win_save;
-    uint8_t fde_ret_save[3];
-} DebugFrame;
-
-static const DebugFrame debug_frame = {
-    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
-    .h.cie.id = -1,
-    .h.cie.version = 1,
-    .h.cie.code_align = 1,
-    .h.cie.data_align = -sizeof(void *) & 0x7f,
-    .h.cie.return_column = 15,            /* o7 */
-
-    /* Total FDE size does not include the "len" member.  */
-    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
-
-    .fde_def_cfa = {
-        12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
-        (2047 & 0x7f) | 0x80, (2047 >> 7)
-    },
-    .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
-    .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
-};
-
-void tcg_register_jit(const void *buf, size_t buf_size)
-{
-    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
-}
-
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
-                              uintptr_t jmp_rw, uintptr_t addr)
-{
-    intptr_t tb_disp = addr - tc_ptr;
-    intptr_t br_disp = addr - jmp_rx;
-    tcg_insn_unit i1, i2;
-
-    /* We can reach the entire address space for ILP32.
-       For LP64, the code_gen_buffer can't be larger than 2GB.  */
-    tcg_debug_assert(tb_disp == (int32_t)tb_disp);
-    tcg_debug_assert(br_disp == (int32_t)br_disp);
-
-    if (!USE_REG_TB) {
-        qatomic_set((uint32_t *)jmp_rw,
-		    deposit32(CALL, 0, 30, br_disp >> 2));
-        flush_idcache_range(jmp_rx, jmp_rw, 4);
-        return;
-    }
-
-    /* This does not exercise the range of the branch, but we do
-       still need to be able to load the new value of TCG_REG_TB.
-       But this does still happen quite often.  */
-    if (check_fit_ptr(tb_disp, 13)) {
-        /* ba,pt %icc, addr */
-        i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
-              | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
-        i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
-              | INSN_IMM13(tb_disp));
-    } else if (tb_disp >= 0) {
-        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
-        i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
-              | INSN_IMM13(tb_disp & 0x3ff));
-    } else {
-        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
-        i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
-              | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
-    }
-
-    qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1));
-    flush_idcache_range(jmp_rx, jmp_rw, 8);
-}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
deleted file mode 100644
index 8655acd..0000000
--- a/tcg/sparc/tcg-target.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef SPARC_TCG_TARGET_H
-#define SPARC_TCG_TARGET_H
-
-#define TCG_TARGET_INSN_UNIT_SIZE 4
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
-#define TCG_TARGET_NB_REGS 32
-#define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-
-typedef enum {
-    TCG_REG_G0 = 0,
-    TCG_REG_G1,
-    TCG_REG_G2,
-    TCG_REG_G3,
-    TCG_REG_G4,
-    TCG_REG_G5,
-    TCG_REG_G6,
-    TCG_REG_G7,
-    TCG_REG_O0,
-    TCG_REG_O1,
-    TCG_REG_O2,
-    TCG_REG_O3,
-    TCG_REG_O4,
-    TCG_REG_O5,
-    TCG_REG_O6,
-    TCG_REG_O7,
-    TCG_REG_L0,
-    TCG_REG_L1,
-    TCG_REG_L2,
-    TCG_REG_L3,
-    TCG_REG_L4,
-    TCG_REG_L5,
-    TCG_REG_L6,
-    TCG_REG_L7,
-    TCG_REG_I0,
-    TCG_REG_I1,
-    TCG_REG_I2,
-    TCG_REG_I3,
-    TCG_REG_I4,
-    TCG_REG_I5,
-    TCG_REG_I6,
-    TCG_REG_I7,
-} TCGReg;
-
-/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_O6
-
-#define TCG_TARGET_STACK_BIAS           2047
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
-#define TCG_TARGET_EXTEND_ARGS 1
-
-#if defined(__VIS__) && __VIS__ >= 0x300
-#define use_vis3_instructions  1
-#else
-extern bool use_vis3_instructions;
-#endif
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div_i32		1
-#define TCG_TARGET_HAS_rem_i32		0
-#define TCG_TARGET_HAS_rot_i32          0
-#define TCG_TARGET_HAS_ext8s_i32        0
-#define TCG_TARGET_HAS_ext16s_i32       0
-#define TCG_TARGET_HAS_ext8u_i32        0
-#define TCG_TARGET_HAS_ext16u_i32       0
-#define TCG_TARGET_HAS_bswap16_i32      0
-#define TCG_TARGET_HAS_bswap32_i32      0
-#define TCG_TARGET_HAS_neg_i32          1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_andc_i32         1
-#define TCG_TARGET_HAS_orc_i32          1
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          0
-#define TCG_TARGET_HAS_ctz_i32          0
-#define TCG_TARGET_HAS_ctpop_i32        0
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
-#define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_movcond_i32      1
-#define TCG_TARGET_HAS_add2_i32         1
-#define TCG_TARGET_HAS_sub2_i32         1
-#define TCG_TARGET_HAS_mulu2_i32        1
-#define TCG_TARGET_HAS_muls2_i32        1
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_direct_jump      1
-#define TCG_TARGET_HAS_qemu_st8_i32     0
-
-#define TCG_TARGET_HAS_extrl_i64_i32    1
-#define TCG_TARGET_HAS_extrh_i64_i32    1
-#define TCG_TARGET_HAS_div_i64          1
-#define TCG_TARGET_HAS_rem_i64          0
-#define TCG_TARGET_HAS_rot_i64          0
-#define TCG_TARGET_HAS_ext8s_i64        0
-#define TCG_TARGET_HAS_ext16s_i64       0
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        0
-#define TCG_TARGET_HAS_ext16u_i64       0
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      0
-#define TCG_TARGET_HAS_bswap32_i64      0
-#define TCG_TARGET_HAS_bswap64_i64      0
-#define TCG_TARGET_HAS_neg_i64          1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         1
-#define TCG_TARGET_HAS_orc_i64          1
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          0
-#define TCG_TARGET_HAS_ctz_i64          0
-#define TCG_TARGET_HAS_ctpop_i64        0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
-#define TCG_TARGET_HAS_extract2_i64     0
-#define TCG_TARGET_HAS_movcond_i64      1
-#define TCG_TARGET_HAS_add2_i64         1
-#define TCG_TARGET_HAS_sub2_i64         1
-#define TCG_TARGET_HAS_mulu2_i64        0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
-#define TCG_TARGET_HAS_mulsh_i64        0
-
-#define TCG_AREG0 TCG_REG_I0
-
-#define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
-
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
-
-#define TCG_TARGET_NEED_POOL_LABELS
-
-#endif
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
new file mode 100644
index 0000000..3b751dc
--- /dev/null
+++ b/tcg/sparc64/tcg-target-con-set.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(rZ, r)
+C_O0_I2(RZ, r)
+C_O0_I2(rZ, rJ)
+C_O0_I2(RZ, RJ)
+C_O0_I2(sZ, A)
+C_O0_I2(SZ, A)
+C_O1_I1(r, A)
+C_O1_I1(R, A)
+C_O1_I1(r, r)
+C_O1_I1(r, R)
+C_O1_I1(R, r)
+C_O1_I1(R, R)
+C_O1_I2(R, R, R)
+C_O1_I2(r, rZ, rJ)
+C_O1_I2(R, RZ, RJ)
+C_O1_I4(r, rZ, rJ, rI, 0)
+C_O1_I4(R, RZ, RJ, RI, 0)
+C_O2_I2(r, r, rZ, rJ)
+C_O2_I4(R, R, RZ, RZ, RJ, RI)
+C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
new file mode 100644
index 0000000..fdb25d9
--- /dev/null
+++ b/tcg/sparc64/tcg-target-con-str.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('R', ALL_GENERAL_REGS64)
+REGS('s', ALL_QLDST_REGS)
+REGS('S', ALL_QLDST_REGS64)
+REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S11)
+CONST('J', TCG_CT_CONST_S13)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
new file mode 100644
index 0000000..097bcfc
--- /dev/null
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -0,0 +1,1965 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* We only support generating code for 64-bit mode.  */
+#ifndef __arch64__
+#error "unsupported code generation mode"
+#endif
+
+#include "../tcg-pool.c.inc"
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "%g0",
+    "%g1",
+    "%g2",
+    "%g3",
+    "%g4",
+    "%g5",
+    "%g6",
+    "%g7",
+    "%o0",
+    "%o1",
+    "%o2",
+    "%o3",
+    "%o4",
+    "%o5",
+    "%o6",
+    "%o7",
+    "%l0",
+    "%l1",
+    "%l2",
+    "%l3",
+    "%l4",
+    "%l5",
+    "%l6",
+    "%l7",
+    "%i0",
+    "%i1",
+    "%i2",
+    "%i3",
+    "%i4",
+    "%i5",
+    "%i6",
+    "%i7",
+};
+#endif
+
+#define TCG_CT_CONST_S11  0x100
+#define TCG_CT_CONST_S13  0x200
+#define TCG_CT_CONST_ZERO 0x400
+
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+/*
+ * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
+ * registers.  These are saved manually by the kernel in full 64-bit
+ * slots.  The %i and %l registers are saved by the register window
+ * mechanism, which only allocates space for 32 bits.  Given that this
+ * window spill/fill can happen on any signal, we must consider the
+ * high bits of the %i and %l registers garbage at all times.
+ */
+#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 32)
+# define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
+#define ALL_QLDST_REGS       (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+#define ALL_QLDST_REGS64     (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
+
+/* Define some temporary registers.  T2 is used for constant generation.  */
+#define TCG_REG_T1  TCG_REG_G1
+#define TCG_REG_T2  TCG_REG_O7
+
+#ifndef CONFIG_SOFTMMU
+# define TCG_GUEST_BASE_REG TCG_REG_I5
+#endif
+
+#define TCG_REG_TB  TCG_REG_I1
+#define USE_REG_TB  (sizeof(void *) > 4)
+
+static const int tcg_target_reg_alloc_order[] = {
+    TCG_REG_L0,
+    TCG_REG_L1,
+    TCG_REG_L2,
+    TCG_REG_L3,
+    TCG_REG_L4,
+    TCG_REG_L5,
+    TCG_REG_L6,
+    TCG_REG_L7,
+
+    TCG_REG_I0,
+    TCG_REG_I1,
+    TCG_REG_I2,
+    TCG_REG_I3,
+    TCG_REG_I4,
+    TCG_REG_I5,
+
+    TCG_REG_G2,
+    TCG_REG_G3,
+    TCG_REG_G4,
+    TCG_REG_G5,
+
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+    TCG_REG_O4,
+    TCG_REG_O5,
+};
+
+static const int tcg_target_call_iarg_regs[6] = {
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+    TCG_REG_O4,
+    TCG_REG_O5,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+};
+
+#define INSN_OP(x)  ((x) << 30)
+#define INSN_OP2(x) ((x) << 22)
+#define INSN_OP3(x) ((x) << 19)
+#define INSN_OPF(x) ((x) << 5)
+#define INSN_RD(x)  ((x) << 25)
+#define INSN_RS1(x) ((x) << 14)
+#define INSN_RS2(x) (x)
+#define INSN_ASI(x) ((x) << 5)
+
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
+#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
+#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
+#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
+#define INSN_COND(x) ((x) << 25)
+
+#define COND_N     0x0
+#define COND_E     0x1
+#define COND_LE    0x2
+#define COND_L     0x3
+#define COND_LEU   0x4
+#define COND_CS    0x5
+#define COND_NEG   0x6
+#define COND_VS    0x7
+#define COND_A     0x8
+#define COND_NE    0x9
+#define COND_G     0xa
+#define COND_GE    0xb
+#define COND_GU    0xc
+#define COND_CC    0xd
+#define COND_POS   0xe
+#define COND_VC    0xf
+#define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z    1
+#define RCOND_LEZ  2
+#define RCOND_LZ   3
+#define RCOND_NZ   5
+#define RCOND_GZ   6
+#define RCOND_GEZ  7
+
+#define MOVCC_ICC  (1 << 18)
+#define MOVCC_XCC  (1 << 18 | 1 << 12)
+
+#define BPCC_ICC   0
+#define BPCC_XCC   (2 << 20)
+#define BPCC_PT    (1 << 19)
+#define BPCC_PN    0
+#define BPCC_A     (1 << 29)
+
+#define BPR_PT     BPCC_PT
+
+#define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
+#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
+#define ARITH_ANDCC (INSN_OP(2) | INSN_OP3(0x11))
+#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
+#define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
+#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
+#define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06))
+#define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
+#define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
+#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
+#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
+#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
+#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
+#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
+#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
+#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
+#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
+#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
+#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
+#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
+
+#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
+#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
+
+#define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
+#define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
+#define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27))
+
+#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
+#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
+#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
+
+#define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
+#define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
+#define JMPL       (INSN_OP(2) | INSN_OP3(0x38))
+#define RETURN     (INSN_OP(2) | INSN_OP3(0x39))
+#define SAVE       (INSN_OP(2) | INSN_OP3(0x3c))
+#define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d))
+#define SETHI      (INSN_OP(0) | INSN_OP2(0x4))
+#define CALL       INSN_OP(1)
+#define LDUB       (INSN_OP(3) | INSN_OP3(0x01))
+#define LDSB       (INSN_OP(3) | INSN_OP3(0x09))
+#define LDUH       (INSN_OP(3) | INSN_OP3(0x02))
+#define LDSH       (INSN_OP(3) | INSN_OP3(0x0a))
+#define LDUW       (INSN_OP(3) | INSN_OP3(0x00))
+#define LDSW       (INSN_OP(3) | INSN_OP3(0x08))
+#define LDX        (INSN_OP(3) | INSN_OP3(0x0b))
+#define STB        (INSN_OP(3) | INSN_OP3(0x05))
+#define STH        (INSN_OP(3) | INSN_OP3(0x06))
+#define STW        (INSN_OP(3) | INSN_OP3(0x04))
+#define STX        (INSN_OP(3) | INSN_OP3(0x0e))
+#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
+#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
+#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
+#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
+#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
+#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
+#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
+#define STBA       (INSN_OP(3) | INSN_OP3(0x15))
+#define STHA       (INSN_OP(3) | INSN_OP3(0x16))
+#define STWA       (INSN_OP(3) | INSN_OP3(0x14))
+#define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
+
+#define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
+
+#define NOP        (SETHI | INSN_RD(TCG_REG_G0) | 0)
+
+#ifndef ASI_PRIMARY_LITTLE
+#define ASI_PRIMARY_LITTLE 0x88
+#endif
+
+#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#ifndef use_vis3_instructions
+bool use_vis3_instructions;
+#endif
+
+static bool check_fit_i64(int64_t val, unsigned int bits)
+{
+    return val == sextract64(val, 0, bits);
+}
+
+static bool check_fit_i32(int32_t val, unsigned int bits)
+{
+    return val == sextract32(val, 0, bits);
+}
+
+#define check_fit_tl    check_fit_i64
+#define check_fit_ptr   check_fit_i64
+
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
+                        intptr_t value, intptr_t addend)
+{
+    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+    uint32_t insn = *src_rw;
+    intptr_t pcrel;
+
+    value += addend;
+    pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx);
+
+    switch (type) {
+    case R_SPARC_WDISP16:
+        if (!check_fit_ptr(pcrel >> 2, 16)) {
+            return false;
+        }
+        insn &= ~INSN_OFF16(-1);
+        insn |= INSN_OFF16(pcrel);
+        break;
+    case R_SPARC_WDISP19:
+        if (!check_fit_ptr(pcrel >> 2, 19)) {
+            return false;
+        }
+        insn &= ~INSN_OFF19(-1);
+        insn |= INSN_OFF19(pcrel);
+        break;
+    case R_SPARC_13:
+        if (!check_fit_ptr(value, 13)) {
+            return false;
+        }
+        insn &= ~INSN_IMM13(-1);
+        insn |= INSN_IMM13(value);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    *src_rw = insn;
+    return true;
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    }
+
+    if (type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+
+    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+        return 1;
+    } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+        return 1;
+    } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static void tcg_out_nop(TCGContext *s)
+{
+    tcg_out32(s, NOP);
+}
+
+static void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
+                          TCGReg rs2, int op)
+{
+    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
+}
+
+static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
+                           int32_t offset, int op)
+{
+    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
+}
+
+static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
+			   int32_t val2, int val2const, int op)
+{
+    tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
+              | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+    if (ret != arg) {
+        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    }
+    return true;
+}
+
+static void tcg_out_mov_delay(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    if (ret != arg) {
+        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    } else {
+        tcg_out_nop(s);
+    }
+}
+
+static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
+{
+    tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
+}
+
+static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
+{
+    tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
+}
+
+static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
+{
+    if (check_fit_i32(arg, 13)) {
+        /* A 13-bit constant sign-extended to 64-bits.  */
+        tcg_out_movi_imm13(s, ret, arg);
+    } else {
+        /* A 32-bit constant zero-extended to 64 bits.  */
+        tcg_out_sethi(s, ret, arg);
+        if (arg & 0x3ff) {
+            tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
+        }
+    }
+}
+
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+                             tcg_target_long arg, bool in_prologue,
+                             TCGReg scratch)
+{
+    tcg_target_long hi, lo = (int32_t)arg;
+    tcg_target_long test, lsb;
+
+    /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
+    if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
+        tcg_out_movi_imm32(s, ret, arg);
+        return;
+    }
+
+    /* A 13-bit constant sign-extended to 64-bits.  */
+    if (check_fit_tl(arg, 13)) {
+        tcg_out_movi_imm13(s, ret, arg);
+        return;
+    }
+
+    /* A 13-bit constant relative to the TB.  */
+    if (!in_prologue && USE_REG_TB) {
+        test = tcg_tbrel_diff(s, (void *)arg);
+        if (check_fit_ptr(test, 13)) {
+            tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD);
+            return;
+        }
+    }
+
+    /* A 32-bit constant sign-extended to 64-bits.  */
+    if (arg == lo) {
+        tcg_out_sethi(s, ret, ~arg);
+        tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
+        return;
+    }
+
+    /* A 32-bit constant, shifted.  */
+    lsb = ctz64(arg);
+    test = (tcg_target_long)arg >> lsb;
+    if (lsb > 10 && test == extract64(test, 0, 21)) {
+        tcg_out_sethi(s, ret, test << 10);
+        tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
+        return;
+    } else if (test == (uint32_t)test || test == (int32_t)test) {
+        tcg_out_movi_int(s, TCG_TYPE_I64, ret, test, in_prologue, scratch);
+        tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
+        return;
+    }
+
+    /* Use the constant pool, if possible. */
+    if (!in_prologue && USE_REG_TB) {
+        new_pool_label(s, arg, R_SPARC_13, s->code_ptr,
+                       tcg_tbrel_diff(s, NULL));
+        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB));
+        return;
+    }
+
+    /* A 64-bit constant decomposed into 2 32-bit pieces.  */
+    if (check_fit_i32(lo, 13)) {
+        hi = (arg - lo) >> 32;
+        tcg_out_movi_imm32(s, ret, hi);
+        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+        tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
+    } else {
+        hi = arg >> 32;
+        tcg_out_movi_imm32(s, ret, hi);
+        tcg_out_movi_imm32(s, scratch, lo);
+        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+        tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long arg)
+{
+    tcg_debug_assert(ret != TCG_REG_T2);
+    tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
+}
+
+static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
+                            TCGReg a2, int op)
+{
+    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
+}
+
+static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
+                         intptr_t offset, int op)
+{
+    if (check_fit_ptr(offset, 13)) {
+        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
+                  INSN_IMM13(offset));
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
+    }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                       TCGReg arg1, intptr_t arg2)
+{
+    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
+{
+    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
+{
+    if (val == 0) {
+        tcg_out_st(s, type, TCG_REG_G0, base, ofs);
+        return true;
+    }
+    return false;
+}
+
+static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg)
+{
+    intptr_t diff = tcg_tbrel_diff(s, arg);
+    if (USE_REG_TB && check_fit_ptr(diff, 13)) {
+        tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
+        return;
+    }
+    tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff);
+    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff);
+}
+
+static void tcg_out_sety(TCGContext *s, TCGReg rs)
+{
+    tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
+}
+
+static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
+                          int32_t val2, int val2const, int uns)
+{
+    /* Load Y with the sign/zero extension of RS1 to 64-bits.  */
+    if (uns) {
+        tcg_out_sety(s, TCG_REG_G0);
+    } else {
+        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+        tcg_out_sety(s, TCG_REG_T1);
+    }
+
+    tcg_out_arithc(s, rd, rs1, val2, val2const,
+                   uns ? ARITH_UDIV : ARITH_SDIV);
+}
+
+static const uint8_t tcg_cond_to_bcond[] = {
+    [TCG_COND_EQ] = COND_E,
+    [TCG_COND_NE] = COND_NE,
+    [TCG_COND_LT] = COND_L,
+    [TCG_COND_GE] = COND_GE,
+    [TCG_COND_LE] = COND_LE,
+    [TCG_COND_GT] = COND_G,
+    [TCG_COND_LTU] = COND_CS,
+    [TCG_COND_GEU] = COND_CC,
+    [TCG_COND_LEU] = COND_LEU,
+    [TCG_COND_GTU] = COND_GU,
+};
+
+static const uint8_t tcg_cond_to_rcond[] = {
+    [TCG_COND_EQ] = RCOND_Z,
+    [TCG_COND_NE] = RCOND_NZ,
+    [TCG_COND_LT] = RCOND_LZ,
+    [TCG_COND_GT] = RCOND_GZ,
+    [TCG_COND_LE] = RCOND_LEZ,
+    [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+    tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
+{
+    int off19 = 0;
+
+    if (l->has_value) {
+        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
+    } else {
+        tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
+    }
+    tcg_out_bpcc0(s, scond, flags, off19);
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
+{
+    tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
+}
+
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
+                               int32_t arg2, int const_arg2, TCGLabel *l)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
+    tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
+    tcg_out_nop(s);
+}
+
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
+                          int32_t v1, int v1const)
+{
+    tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+              | INSN_RS1(tcg_cond_to_bcond[cond])
+              | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+                                TCGReg c1, int32_t c2, int c2const,
+                                int32_t v1, int v1const)
+{
+    tcg_out_cmp(s, c1, c2, c2const);
+    tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
+                               int32_t arg2, int const_arg2, TCGLabel *l)
+{
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
+    if (arg2 == 0 && !is_unsigned_cond(cond)) {
+        int off16 = 0;
+
+        if (l->has_value) {
+            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
+        } else {
+            tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
+        }
+        tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+                  | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+    } else {
+        tcg_out_cmp(s, arg1, arg2, const_arg2);
+        tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
+    }
+    tcg_out_nop(s);
+}
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
+                         int32_t v1, int v1const)
+{
+    tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+              | (tcg_cond_to_rcond[cond] << 10)
+              | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+                                TCGReg c1, int32_t c2, int c2const,
+                                int32_t v1, int v1const)
+{
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+       Note that the immediate range is one bit smaller, so we must check
+       for that as well.  */
+    if (c2 == 0 && !is_unsigned_cond(cond)
+        && (!v1const || check_fit_i32(v1, 10))) {
+        tcg_out_movr(s, cond, ret, c1, v1, v1const);
+    } else {
+        tcg_out_cmp(s, c1, c2, c2const);
+        tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+    }
+}
+
+static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+                                TCGReg c1, int32_t c2, int c2const)
+{
+    /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */
+    switch (cond) {
+    case TCG_COND_LTU:
+    case TCG_COND_GEU:
+        /* The result of the comparison is in the carry bit.  */
+        break;
+
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        /* For equality, we can transform to inequality vs zero.  */
+        if (c2 != 0) {
+            tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
+            c2 = TCG_REG_T1;
+        } else {
+            c2 = c1;
+        }
+        c1 = TCG_REG_G0, c2const = 0;
+        cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
+	break;
+
+    case TCG_COND_GTU:
+    case TCG_COND_LEU:
+        /* If we don't need to load a constant into a register, we can
+           swap the operands on GTU/LEU.  There's no benefit to loading
+           the constant into a temporary register.  */
+        if (!c2const || c2 == 0) {
+            TCGReg t = c1;
+            c1 = c2;
+            c2 = t;
+            c2const = 0;
+            cond = tcg_swap_cond(cond);
+            break;
+        }
+        /* FALLTHRU */
+
+    default:
+        tcg_out_cmp(s, c1, c2, c2const);
+        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
+        return;
+    }
+
+    tcg_out_cmp(s, c1, c2, c2const);
+    if (cond == TCG_COND_LTU) {
+        tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
+    } else {
+        tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
+    }
+}
+
+static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+                                TCGReg c1, int32_t c2, int c2const)
+{
+    if (use_vis3_instructions) {
+        switch (cond) {
+        case TCG_COND_NE:
+            if (c2 != 0) {
+                break;
+            }
+            c2 = c1, c2const = 0, c1 = TCG_REG_G0;
+            /* FALLTHRU */
+        case TCG_COND_LTU:
+            tcg_out_cmp(s, c1, c2, c2const);
+            tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
+            return;
+        default:
+            break;
+        }
+    }
+
+    /* For 64-bit signed comparisons vs zero, we can avoid the compare
+       if the input does not overlap the output.  */
+    if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_movr(s, cond, ret, c1, 1, 1);
+    } else {
+        tcg_out_cmp(s, c1, c2, c2const);
+        tcg_out_movi_imm13(s, ret, 0);
+        tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+    }
+}
+
+static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
+                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
+                                int32_t bh, int bhconst, int opl, int oph)
+{
+    TCGReg tmp = TCG_REG_T1;
+
+    /* Note that the low parts are fully consumed before tmp is set.  */
+    if (rl != ah && (bhconst || rl != bh)) {
+        tmp = rl;
+    }
+
+    tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+    tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+    tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
+}
+
+static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
+                                TCGReg al, TCGReg ah, int32_t bl, int blconst,
+                                int32_t bh, int bhconst, bool is_sub)
+{
+    TCGReg tmp = TCG_REG_T1;
+
+    /* Note that the low parts are fully consumed before tmp is set.  */
+    if (rl != ah && (bhconst || rl != bh)) {
+        tmp = rl;
+    }
+
+    tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
+
+    if (use_vis3_instructions && !is_sub) {
+        /* Note that ADDXC doesn't accept immediates.  */
+        if (bhconst && bh != 0) {
+           tcg_out_movi_imm13(s, TCG_REG_T2, bh);
+           bh = TCG_REG_T2;
+        }
+        tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
+    } else if (bh == TCG_REG_G0) {
+	/* If we have a zero, we can perform the operation in two insns,
+           with the arithmetic first, and a conditional move into place.  */
+	if (rh == ah) {
+            tcg_out_arithi(s, TCG_REG_T2, ah, 1,
+			   is_sub ? ARITH_SUB : ARITH_ADD);
+            tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
+	} else {
+            tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
+	    tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
+	}
+    } else {
+        /*
+         * Otherwise adjust BH as if there is carry into T2.
+         * Note that constant BH is constrained to 11 bits for the MOVCC,
+         * so the adjustment fits 12 bits.
+         */
+        if (bhconst) {
+            tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
+        } else {
+            tcg_out_arithi(s, TCG_REG_T2, bh, 1,
+                           is_sub ? ARITH_SUB : ARITH_ADD);
+        }
+        /* ... smoosh T2 back to original BH if carry is clear ... */
+        tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
+	/* ... and finally perform the arithmetic with the new operand.  */
+        tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
+    }
+
+    tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
+}
+
+static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
+                               bool in_prologue, bool tail_call)
+{
+    uintptr_t desti = (uintptr_t)dest;
+
+    /* Be careful not to clobber %o7 for a tail call. */
+    tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
+                     desti & ~0xfff, in_prologue,
+                     tail_call ? TCG_REG_G2 : TCG_REG_O7);
+    tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
+                   TCG_REG_T1, desti & 0xfff, JMPL);
+}
+
+static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
+                                 bool in_prologue)
+{
+    ptrdiff_t disp = tcg_pcrel_diff(s, dest);
+
+    if (disp == (int32_t)disp) {
+        tcg_out32(s, CALL | (uint32_t)disp >> 2);
+    } else {
+        tcg_out_jmpl_const(s, dest, in_prologue, false);
+    }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+{
+    tcg_out_call_nodelay(s, dest, false);
+    tcg_out_nop(s);
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
+    tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
+}
+
+#ifdef CONFIG_SOFTMMU
+static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
+static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
+
+static void emit_extend(TCGContext *s, TCGReg r, int op)
+{
+    /* Emit zero extend of 8, 16 or 32 bit data as
+     * required by the MO_* value op; do nothing for 64 bit.
+     */
+    switch (op & MO_SIZE) {
+    case MO_8:
+        tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
+        break;
+    case MO_16:
+        tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
+        tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
+        break;
+    case MO_32:
+        tcg_out_arith(s, r, r, 0, SHIFT_SRL);
+        break;
+    case MO_64:
+        break;
+    }
+}
+
+static void build_trampolines(TCGContext *s)
+{
+    static void * const qemu_ld_helpers[] = {
+        [MO_UB]   = helper_ret_ldub_mmu,
+        [MO_SB]   = helper_ret_ldsb_mmu,
+        [MO_LEUW] = helper_le_lduw_mmu,
+        [MO_LESW] = helper_le_ldsw_mmu,
+        [MO_LEUL] = helper_le_ldul_mmu,
+        [MO_LEUQ] = helper_le_ldq_mmu,
+        [MO_BEUW] = helper_be_lduw_mmu,
+        [MO_BESW] = helper_be_ldsw_mmu,
+        [MO_BEUL] = helper_be_ldul_mmu,
+        [MO_BEUQ] = helper_be_ldq_mmu,
+    };
+    static void * const qemu_st_helpers[] = {
+        [MO_UB]   = helper_ret_stb_mmu,
+        [MO_LEUW] = helper_le_stw_mmu,
+        [MO_LEUL] = helper_le_stl_mmu,
+        [MO_LEUQ] = helper_le_stq_mmu,
+        [MO_BEUW] = helper_be_stw_mmu,
+        [MO_BEUL] = helper_be_stl_mmu,
+        [MO_BEUQ] = helper_be_stq_mmu,
+    };
+
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
+        if (qemu_ld_helpers[i] == NULL) {
+            continue;
+        }
+
+        /* May as well align the trampoline.  */
+        while ((uintptr_t)s->code_ptr & 15) {
+            tcg_out_nop(s);
+        }
+        qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
+
+        /* Set the retaddr operand.  */
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
+        /* Tail call.  */
+        tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
+        /* delay slot -- set the env argument */
+        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
+        if (qemu_st_helpers[i] == NULL) {
+            continue;
+        }
+
+        /* May as well align the trampoline.  */
+        while ((uintptr_t)s->code_ptr & 15) {
+            tcg_out_nop(s);
+        }
+        qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
+
+        emit_extend(s, TCG_REG_O2, i);
+
+        /* Set the retaddr operand.  */
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
+
+        /* Tail call.  */
+        tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
+        /* delay slot -- set the env argument */
+        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
+    }
+}
+#else
+static const tcg_insn_unit *qemu_unalign_ld_trampoline;
+static const tcg_insn_unit *qemu_unalign_st_trampoline;
+
+static void build_trampolines(TCGContext *s)
+{
+    for (int ld = 0; ld < 2; ++ld) {
+        void *helper;
+
+        while ((uintptr_t)s->code_ptr & 15) {
+            tcg_out_nop(s);
+        }
+
+        if (ld) {
+            helper = helper_unaligned_ld;
+            qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
+        } else {
+            helper = helper_unaligned_st;
+            qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
+        }
+
+        /* Tail call.  */
+        tcg_out_jmpl_const(s, helper, true, true);
+        /* delay slot -- set the env argument */
+        tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
+    }
+}
+#endif
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int tmp_buf_size, frame_size;
+
+    /*
+     * The TCG temp buffer is at the top of the frame, immediately
+     * below the frame pointer.  Use the logical (aligned) offset here;
+     * the stack bias is applied in temp_allocate_frame().
+     */
+    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+    tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size);
+
+    /*
+     * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+     * otherwise the minimal frame usable by callees.
+     */
+    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+    frame_size += TCG_TARGET_STACK_ALIGN - 1;
+    frame_size &= -TCG_TARGET_STACK_ALIGN;
+    tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
+              INSN_IMM13(-frame_size));
+
+#ifndef CONFIG_SOFTMMU
+    if (guest_base != 0) {
+        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG,
+                         guest_base, true, TCG_REG_T1);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
+    /* We choose TCG_REG_TB such that no move is required.  */
+    if (USE_REG_TB) {
+        QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
+        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
+    }
+
+    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
+    /* delay slot */
+    tcg_out_nop(s);
+
+    /* Epilogue for goto_ptr.  */
+    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+    /* delay slot */
+    tcg_out_movi_imm13(s, TCG_REG_O0, 0);
+
+    build_trampolines(s);
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    int i;
+    for (i = 0; i < count; ++i) {
+        p[i] = NOP;
+    }
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+/* We expect to use a 13-bit negative offset from ENV.  */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
+
+/* Perform the TLB load and compare.
+
+   Inputs:
+   ADDRLO and ADDRHI contain the possible two parts of the address.
+
+   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+   This should be offsetof addr_read or addr_write.
+
+   The result of the TLB comparison is in %[ix]cc.  The sanitized address
+   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
+
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
+                               MemOp opc, int which)
+{
+    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+    const TCGReg r0 = TCG_REG_O0;
+    const TCGReg r1 = TCG_REG_O1;
+    const TCGReg r2 = TCG_REG_O2;
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    tcg_target_long compare_mask;
+
+    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
+    tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
+    tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
+
+    /* Extract the page index, shifted into place for tlb index.  */
+    tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
+                   SHIFT_SRL);
+    tcg_out_arith(s, r2, r2, r0, ARITH_AND);
+
+    /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2.  */
+    tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
+
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
+    tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
+
+    /* Mask out the page offset, except for the required alignment.
+       We don't support unaligned accesses.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+    compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+    if (check_fit_tl(compare_mask, 13)) {
+        tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
+        tcg_out_arith(s, r2, addr, r2, ARITH_AND);
+    }
+    tcg_out_cmp(s, r0, r2, 0);
+
+    /* If the guest address must be zero-extended, do so now.  */
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
+        return r0;
+    }
+    return addr;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
+    [MO_UB]   = LDUB,
+    [MO_SB]   = LDSB,
+    [MO_UB | MO_LE] = LDUB,
+    [MO_SB | MO_LE] = LDSB,
+
+    [MO_BEUW] = LDUH,
+    [MO_BESW] = LDSH,
+    [MO_BEUL] = LDUW,
+    [MO_BESL] = LDSW,
+    [MO_BEUQ] = LDX,
+    [MO_BESQ] = LDX,
+
+    [MO_LEUW] = LDUH_LE,
+    [MO_LESW] = LDSH_LE,
+    [MO_LEUL] = LDUW_LE,
+    [MO_LESL] = LDSW_LE,
+    [MO_LEUQ] = LDX_LE,
+    [MO_LESQ] = LDX_LE,
+};
+
+static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
+    [MO_UB]   = STB,
+
+    [MO_BEUW] = STH,
+    [MO_BEUL] = STW,
+    [MO_BEUQ] = STX,
+
+    [MO_LEUW] = STH_LE,
+    [MO_LEUL] = STW_LE,
+    [MO_LEUQ] = STX_LE,
+};
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
+                            MemOpIdx oi, bool is_64)
+{
+    MemOp memop = get_memop(oi);
+    tcg_insn_unit *label_ptr;
+
+#ifdef CONFIG_SOFTMMU
+    unsigned memi = get_mmuidx(oi);
+    TCGReg addrz;
+    const tcg_insn_unit *func;
+
+    addrz = tcg_out_tlb_load(s, addr, memi, memop,
+                             offsetof(CPUTLBEntry, addr_read));
+
+    /* The fast path is exactly one insn.  Thus we can perform the
+       entire TLB Hit in the (annulled) delay slot of the branch
+       over the TLB Miss case.  */
+
+    /* beq,a,pt %[xi]cc, label0 */
+    label_ptr = s->code_ptr;
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+    /* delay slot */
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+
+    /* TLB Miss.  */
+
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
+
+    /* We use the helpers to extend SB and SW data, leaving the case
+       of SL needing explicit extending below.  */
+    if ((memop & MO_SSIZE) == MO_SL) {
+        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+    } else {
+        func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
+    }
+    tcg_debug_assert(func != NULL);
+    tcg_out_call_nodelay(s, func, false);
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
+
+    /* We let the helper sign-extend SB and SW, but leave SL for here.  */
+    if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+        tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
+    }
+
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+    TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
+    unsigned a_bits = get_alignment_bits(memop);
+    unsigned s_bits = memop & MO_SIZE;
+    unsigned t_bits;
+
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+        addr = TCG_REG_T1;
+    }
+
+    /*
+     * Normal case: alignment equal to access size.
+     */
+    if (a_bits == s_bits) {
+        tcg_out_ldst_rr(s, data, addr, index,
+                        qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+        return;
+    }
+
+    /*
+     * Test for at least natural alignment, and assume most accesses
+     * will be aligned -- perform a straight load in the delay slot.
+     * This is required to preserve atomicity for aligned accesses.
+     */
+    t_bits = MAX(a_bits, s_bits);
+    tcg_debug_assert(t_bits < 13);
+    tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
+
+    /* beq,a,pt %icc, label */
+    label_ptr = s->code_ptr;
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
+    /* delay slot */
+    tcg_out_ldst_rr(s, data, addr, index,
+                    qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+
+    if (a_bits >= s_bits) {
+        /*
+         * Overalignment: A successful alignment test will perform the memory
+         * operation in the delay slot, and failure need only invoke the
+         * handler for SIGBUS.
+         */
+        tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
+        /* delay slot -- move to low part of argument reg */
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
+    } else {
+        /* Underalignment: load by pieces of minimum alignment. */
+        int ld_opc, a_size, s_size, i;
+
+        /*
+         * Force full address into T1 early; avoids problems with
+         * overlap between @addr and @data.
+         */
+        tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
+
+        a_size = 1 << a_bits;
+        s_size = 1 << s_bits;
+        if ((memop & MO_BSWAP) == MO_BE) {
+            ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
+            tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
+            ld_opc = qemu_ld_opc[a_bits | MO_BE];
+            for (i = a_size; i < s_size; i += a_size) {
+                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
+                tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
+                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
+            }
+        } else if (a_bits == 0) {
+            ld_opc = LDUB;
+            tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
+            for (i = a_size; i < s_size; i += a_size) {
+                if ((memop & MO_SIGN) && i == s_size - a_size) {
+                    ld_opc = LDSB;
+                }
+                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
+                tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
+                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
+            }
+        } else {
+            ld_opc = qemu_ld_opc[a_bits | MO_LE];
+            tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
+            for (i = a_size; i < s_size; i += a_size) {
+                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
+                if ((memop & MO_SIGN) && i == s_size - a_size) {
+                    ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
+                }
+                tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
+                tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
+                tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
+            }
+        }
+    }
+
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
+                            MemOpIdx oi)
+{
+    MemOp memop = get_memop(oi);
+    tcg_insn_unit *label_ptr;
+
+#ifdef CONFIG_SOFTMMU
+    unsigned memi = get_mmuidx(oi);
+    TCGReg addrz;
+    const tcg_insn_unit *func;
+
+    addrz = tcg_out_tlb_load(s, addr, memi, memop,
+                             offsetof(CPUTLBEntry, addr_write));
+
+    /* The fast path is exactly one insn.  Thus we can perform the entire
+       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
+    /* beq,a,pt %[xi]cc, label0 */
+    label_ptr = s->code_ptr;
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+                  | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+    /* delay slot */
+    tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+
+    /* TLB Miss.  */
+
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
+    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
+
+    func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+    tcg_debug_assert(func != NULL);
+    tcg_out_call_nodelay(s, func, false);
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
+
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+    TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
+    unsigned a_bits = get_alignment_bits(memop);
+    unsigned s_bits = memop & MO_SIZE;
+    unsigned t_bits;
+
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+        addr = TCG_REG_T1;
+    }
+
+    /*
+     * Normal case: alignment equal to access size.
+     */
+    if (a_bits == s_bits) {
+        tcg_out_ldst_rr(s, data, addr, index,
+                        qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+        return;
+    }
+
+    /*
+     * Test for at least natural alignment, and assume most accesses
+     * will be aligned -- perform a straight store in the delay slot.
+     * This is required to preserve atomicity for aligned accesses.
+     */
+    t_bits = MAX(a_bits, s_bits);
+    tcg_debug_assert(t_bits < 13);
+    tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
+
+    /* beq,a,pt %icc, label */
+    label_ptr = s->code_ptr;
+    tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
+    /* delay slot */
+    tcg_out_ldst_rr(s, data, addr, index,
+                    qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+
+    if (a_bits >= s_bits) {
+        /*
+         * Overalignment: A successful alignment test will perform the memory
+         * operation in the delay slot, and failure need only invoke the
+         * handler for SIGBUS.
+         */
+        tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
+        /* delay slot -- move to low part of argument reg */
+        tcg_out_mov_delay(s, TCG_REG_O1, addr);
+    } else {
+        /* Underalignment: store by pieces of minimum alignment. */
+        int st_opc, a_size, s_size, i;
+
+        /*
+         * Force full address into T1 early; avoids problems with
+         * overlap between @addr and @data.
+         */
+        tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
+
+        a_size = 1 << a_bits;
+        s_size = 1 << s_bits;
+        if ((memop & MO_BSWAP) == MO_BE) {
+            st_opc = qemu_st_opc[a_bits | MO_BE];
+            for (i = 0; i < s_size; i += a_size) {
+                TCGReg d = data;
+                int shift = (s_size - a_size - i) * 8;
+                if (shift) {
+                    d = TCG_REG_T2;
+                    tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
+                }
+                tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
+            }
+        } else if (a_bits == 0) {
+            tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
+            for (i = 1; i < s_size; i++) {
+                tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
+                tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
+            }
+        } else {
+            /* Note that ST*A with immediate asi must use indexed address. */
+            st_opc = qemu_st_opc[a_bits + MO_LE];
+            tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
+            for (i = a_size; i < s_size; i += a_size) {
+                tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
+                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
+                tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
+            }
+        }
+    }
+
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGArg a0, a1, a2;
+    int c, c2;
+
+    /* Hoist the loads of the most common arguments.  */
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    c2 = const_args[2];
+
+    switch (opc) {
+    case INDEX_op_exit_tb:
+        if (check_fit_ptr(a0, 13)) {
+            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+            tcg_out_movi_imm13(s, TCG_REG_O0, a0);
+            break;
+        } else if (USE_REG_TB) {
+            intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
+            if (check_fit_ptr(tb_diff, 13)) {
+                tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+                /* Note that TCG_REG_TB has been unwound to O1.  */
+                tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
+                break;
+            }
+        }
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
+        tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+        tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
+        break;
+    case INDEX_op_goto_tb:
+        if (s->tb_jmp_insn_offset) {
+            /* direct jump method */
+            if (USE_REG_TB) {
+                /* make sure the patch is 8-byte aligned.  */
+                if ((intptr_t)s->code_ptr & 4) {
+                    tcg_out_nop(s);
+                }
+                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+                tcg_out_sethi(s, TCG_REG_T1, 0);
+                tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
+                tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
+                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
+            } else {
+                s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+                tcg_out32(s, CALL);
+                tcg_out_nop(s);
+            }
+        } else {
+            /* indirect jump method */
+            tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0);
+            tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
+            tcg_out_nop(s);
+        }
+        set_jmp_reset_offset(s, a0);
+
+        /* For the unlinked path of goto_tb, we need to reset
+           TCG_REG_TB to the beginning of this TB.  */
+        if (USE_REG_TB) {
+            c = -tcg_current_code_size(s);
+            if (check_fit_i32(c, 13)) {
+                tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
+            } else {
+                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
+                tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
+                              TCG_REG_T1, ARITH_ADD);
+            }
+        }
+        break;
+    case INDEX_op_goto_ptr:
+        tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
+        if (USE_REG_TB) {
+            tcg_out_mov_delay(s, TCG_REG_TB, a0);
+        } else {
+            tcg_out_nop(s);
+        }
+        break;
+    case INDEX_op_br:
+        tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
+        tcg_out_nop(s);
+        break;
+
+#define OP_32_64(x)                             \
+        glue(glue(case INDEX_op_, x), _i32):    \
+        glue(glue(case INDEX_op_, x), _i64)
+
+    OP_32_64(ld8u):
+        tcg_out_ldst(s, a0, a1, a2, LDUB);
+        break;
+    OP_32_64(ld8s):
+        tcg_out_ldst(s, a0, a1, a2, LDSB);
+        break;
+    OP_32_64(ld16u):
+        tcg_out_ldst(s, a0, a1, a2, LDUH);
+        break;
+    OP_32_64(ld16s):
+        tcg_out_ldst(s, a0, a1, a2, LDSH);
+        break;
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32u_i64:
+        tcg_out_ldst(s, a0, a1, a2, LDUW);
+        break;
+    OP_32_64(st8):
+        tcg_out_ldst(s, a0, a1, a2, STB);
+        break;
+    OP_32_64(st16):
+        tcg_out_ldst(s, a0, a1, a2, STH);
+        break;
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        tcg_out_ldst(s, a0, a1, a2, STW);
+        break;
+    OP_32_64(add):
+        c = ARITH_ADD;
+        goto gen_arith;
+    OP_32_64(sub):
+        c = ARITH_SUB;
+        goto gen_arith;
+    OP_32_64(and):
+        c = ARITH_AND;
+        goto gen_arith;
+    OP_32_64(andc):
+        c = ARITH_ANDN;
+        goto gen_arith;
+    OP_32_64(or):
+        c = ARITH_OR;
+        goto gen_arith;
+    OP_32_64(orc):
+        c = ARITH_ORN;
+        goto gen_arith;
+    OP_32_64(xor):
+        c = ARITH_XOR;
+        goto gen_arith;
+    case INDEX_op_shl_i32:
+        c = SHIFT_SLL;
+    do_shift32:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
+        break;
+    case INDEX_op_shr_i32:
+        c = SHIFT_SRL;
+        goto do_shift32;
+    case INDEX_op_sar_i32:
+        c = SHIFT_SRA;
+        goto do_shift32;
+    case INDEX_op_mul_i32:
+        c = ARITH_UMUL;
+        goto gen_arith;
+
+    OP_32_64(neg):
+	c = ARITH_SUB;
+	goto gen_arith1;
+    OP_32_64(not):
+	c = ARITH_ORN;
+	goto gen_arith1;
+
+    case INDEX_op_div_i32:
+        tcg_out_div32(s, a0, a1, a2, c2, 0);
+        break;
+    case INDEX_op_divu_i32:
+        tcg_out_div32(s, a0, a1, a2, c2, 1);
+        break;
+
+    case INDEX_op_brcond_i32:
+        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+        break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
+        break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+        break;
+
+    case INDEX_op_add2_i32:
+        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+                            args[4], const_args[4], args[5], const_args[5],
+                            ARITH_ADDCC, ARITH_ADDC);
+        break;
+    case INDEX_op_sub2_i32:
+        tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+                            args[4], const_args[4], args[5], const_args[5],
+                            ARITH_SUBCC, ARITH_SUBC);
+        break;
+    case INDEX_op_mulu2_i32:
+        c = ARITH_UMUL;
+        goto do_mul2;
+    case INDEX_op_muls2_i32:
+        c = ARITH_SMUL;
+    do_mul2:
+        /* The 32-bit multiply insns produce a full 64-bit result. */
+        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
+        tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
+        break;
+
+    case INDEX_op_qemu_ld_i32:
+        tcg_out_qemu_ld(s, a0, a1, a2, false);
+        break;
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, a0, a1, a2, true);
+        break;
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, a0, a1, a2);
+        break;
+
+    case INDEX_op_ld32s_i64:
+        tcg_out_ldst(s, a0, a1, a2, LDSW);
+        break;
+    case INDEX_op_ld_i64:
+        tcg_out_ldst(s, a0, a1, a2, LDX);
+        break;
+    case INDEX_op_st_i64:
+        tcg_out_ldst(s, a0, a1, a2, STX);
+        break;
+    case INDEX_op_shl_i64:
+        c = SHIFT_SLLX;
+    do_shift64:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
+        break;
+    case INDEX_op_shr_i64:
+        c = SHIFT_SRLX;
+        goto do_shift64;
+    case INDEX_op_sar_i64:
+        c = SHIFT_SRAX;
+        goto do_shift64;
+    case INDEX_op_mul_i64:
+        c = ARITH_MULX;
+        goto gen_arith;
+    case INDEX_op_div_i64:
+        c = ARITH_SDIVX;
+        goto gen_arith;
+    case INDEX_op_divu_i64:
+        c = ARITH_UDIVX;
+        goto gen_arith;
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_ext32s_i64:
+        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
+        break;
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_ext32u_i64:
+        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
+        break;
+    case INDEX_op_extrl_i64_i32:
+        tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+        break;
+    case INDEX_op_extrh_i64_i32:
+        tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
+        break;
+
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+        break;
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
+        break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+        break;
+    case INDEX_op_add2_i64:
+        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+                            const_args[4], args[5], const_args[5], false);
+        break;
+    case INDEX_op_sub2_i64:
+        tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+                            const_args[4], args[5], const_args[5], true);
+        break;
+    case INDEX_op_muluh_i64:
+        tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
+        break;
+
+    gen_arith:
+        tcg_out_arithc(s, a0, a1, a2, c2, c);
+        break;
+
+    gen_arith1:
+	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
+	break;
+
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
+
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
+    default:
+        tcg_abort();
+    }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+    switch (op) {
+    case INDEX_op_goto_ptr:
+        return C_O0_I1(r);
+
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld_i32:
+    case INDEX_op_neg_i32:
+    case INDEX_op_not_i32:
+        return C_O1_I1(r, r);
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st_i32:
+        return C_O0_I2(rZ, r);
+
+    case INDEX_op_add_i32:
+    case INDEX_op_mul_i32:
+    case INDEX_op_div_i32:
+    case INDEX_op_divu_i32:
+    case INDEX_op_sub_i32:
+    case INDEX_op_and_i32:
+    case INDEX_op_andc_i32:
+    case INDEX_op_or_i32:
+    case INDEX_op_orc_i32:
+    case INDEX_op_xor_i32:
+    case INDEX_op_shl_i32:
+    case INDEX_op_shr_i32:
+    case INDEX_op_sar_i32:
+    case INDEX_op_setcond_i32:
+        return C_O1_I2(r, rZ, rJ);
+
+    case INDEX_op_brcond_i32:
+        return C_O0_I2(rZ, rJ);
+    case INDEX_op_movcond_i32:
+        return C_O1_I4(r, rZ, rJ, rI, 0);
+    case INDEX_op_add2_i32:
+    case INDEX_op_sub2_i32:
+        return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
+    case INDEX_op_mulu2_i32:
+    case INDEX_op_muls2_i32:
+        return C_O2_I2(r, r, rZ, rJ);
+
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+        return C_O1_I1(R, r);
+
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
+        return C_O0_I2(RZ, r);
+
+    case INDEX_op_add_i64:
+    case INDEX_op_mul_i64:
+    case INDEX_op_div_i64:
+    case INDEX_op_divu_i64:
+    case INDEX_op_sub_i64:
+    case INDEX_op_and_i64:
+    case INDEX_op_andc_i64:
+    case INDEX_op_or_i64:
+    case INDEX_op_orc_i64:
+    case INDEX_op_xor_i64:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i64:
+    case INDEX_op_setcond_i64:
+        return C_O1_I2(R, RZ, RJ);
+
+    case INDEX_op_neg_i64:
+    case INDEX_op_not_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+        return C_O1_I1(R, R);
+
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extrh_i64_i32:
+        return C_O1_I1(r, R);
+
+    case INDEX_op_brcond_i64:
+        return C_O0_I2(RZ, RJ);
+    case INDEX_op_movcond_i64:
+        return C_O1_I4(R, RZ, RJ, RI, 0);
+    case INDEX_op_add2_i64:
+    case INDEX_op_sub2_i64:
+        return C_O2_I4(R, R, RZ, RZ, RJ, RI);
+    case INDEX_op_muluh_i64:
+        return C_O1_I2(R, R, R);
+
+    case INDEX_op_qemu_ld_i32:
+        return C_O1_I1(r, A);
+    case INDEX_op_qemu_ld_i64:
+        return C_O1_I1(R, A);
+    case INDEX_op_qemu_st_i32:
+        return C_O0_I2(sZ, A);
+    case INDEX_op_qemu_st_i64:
+        return C_O0_I2(SZ, A);
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+    /*
+     * Only probe for the platform and capabilities if we haven't already
+     * determined maximum values at compile time.
+     */
+#ifndef use_vis3_instructions
+    {
+        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+        use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
+    }
+#endif
+
+    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
+
+    tcg_target_call_clobber_regs = 0;
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
+
+    s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+}
+
+#define ELF_HOST_MACHINE  EM_SPARCV9
+
+typedef struct {
+    DebugFrameHeader h;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_win_save;
+    uint8_t fde_ret_save[3];
+} DebugFrame;
+
+static const DebugFrame debug_frame = {
+    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+    .h.cie.id = -1,
+    .h.cie.version = 1,
+    .h.cie.code_align = 1,
+    .h.cie.data_align = -sizeof(void *) & 0x7f,
+    .h.cie.return_column = 15,            /* o7 */
+
+    /* Total FDE size does not include the "len" member.  */
+    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
+        (2047 & 0x7f) | 0x80, (2047 >> 7)
+    },
+    .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
+    .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+                              uintptr_t jmp_rw, uintptr_t addr)
+{
+    intptr_t tb_disp = addr - tc_ptr;
+    intptr_t br_disp = addr - jmp_rx;
+    tcg_insn_unit i1, i2;
+
+    /* We can reach the entire address space for ILP32.
+       For LP64, the code_gen_buffer can't be larger than 2GB.  */
+    tcg_debug_assert(tb_disp == (int32_t)tb_disp);
+    tcg_debug_assert(br_disp == (int32_t)br_disp);
+
+    if (!USE_REG_TB) {
+        qatomic_set((uint32_t *)jmp_rw,
+		    deposit32(CALL, 0, 30, br_disp >> 2));
+        flush_idcache_range(jmp_rx, jmp_rw, 4);
+        return;
+    }
+
+    /* This does not exercise the range of the branch, but we do
+       still need to be able to load the new value of TCG_REG_TB.
+       But this does still happen quite often.  */
+    if (check_fit_ptr(tb_disp, 13)) {
+        /* ba,pt %icc, addr */
+        i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
+              | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
+        i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
+              | INSN_IMM13(tb_disp));
+    } else if (tb_disp >= 0) {
+        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
+        i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
+              | INSN_IMM13(tb_disp & 0x3ff));
+    } else {
+        i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
+        i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
+              | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
+    }
+
+    qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1));
+    flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
new file mode 100644
index 0000000..8655acd
--- /dev/null
+++ b/tcg/sparc64/tcg-target.h
@@ -0,0 +1,161 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SPARC_TCG_TARGET_H
+#define SPARC_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
+
+typedef enum {
+    TCG_REG_G0 = 0,
+    TCG_REG_G1,
+    TCG_REG_G2,
+    TCG_REG_G3,
+    TCG_REG_G4,
+    TCG_REG_G5,
+    TCG_REG_G6,
+    TCG_REG_G7,
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+    TCG_REG_O4,
+    TCG_REG_O5,
+    TCG_REG_O6,
+    TCG_REG_O7,
+    TCG_REG_L0,
+    TCG_REG_L1,
+    TCG_REG_L2,
+    TCG_REG_L3,
+    TCG_REG_L4,
+    TCG_REG_L5,
+    TCG_REG_L6,
+    TCG_REG_L7,
+    TCG_REG_I0,
+    TCG_REG_I1,
+    TCG_REG_I2,
+    TCG_REG_I3,
+    TCG_REG_I4,
+    TCG_REG_I5,
+    TCG_REG_I6,
+    TCG_REG_I7,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#define TCG_TARGET_STACK_BIAS           2047
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
+#define TCG_TARGET_EXTEND_ARGS 1
+
+#if defined(__VIS__) && __VIS__ >= 0x300
+#define use_vis3_instructions  1
+#else
+extern bool use_vis3_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32		1
+#define TCG_TARGET_HAS_rem_i32		0
+#define TCG_TARGET_HAS_rot_i32          0
+#define TCG_TARGET_HAS_ext8s_i32        0
+#define TCG_TARGET_HAS_ext16s_i32       0
+#define TCG_TARGET_HAS_ext8u_i32        0
+#define TCG_TARGET_HAS_ext16u_i32       0
+#define TCG_TARGET_HAS_bswap16_i32      0
+#define TCG_TARGET_HAS_bswap32_i32      0
+#define TCG_TARGET_HAS_neg_i32          1
+#define TCG_TARGET_HAS_not_i32          1
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          0
+#define TCG_TARGET_HAS_nand_i32         0
+#define TCG_TARGET_HAS_nor_i32          0
+#define TCG_TARGET_HAS_clz_i32          0
+#define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_extract_i32      0
+#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract2_i32     0
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_direct_jump      1
+#define TCG_TARGET_HAS_qemu_st8_i32     0
+
+#define TCG_TARGET_HAS_extrl_i64_i32    1
+#define TCG_TARGET_HAS_extrh_i64_i32    1
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_rot_i64          0
+#define TCG_TARGET_HAS_ext8s_i64        0
+#define TCG_TARGET_HAS_ext16s_i64       0
+#define TCG_TARGET_HAS_ext32s_i64       1
+#define TCG_TARGET_HAS_ext8u_i64        0
+#define TCG_TARGET_HAS_ext16u_i64       0
+#define TCG_TARGET_HAS_ext32u_i64       1
+#define TCG_TARGET_HAS_bswap16_i64      0
+#define TCG_TARGET_HAS_bswap32_i64      0
+#define TCG_TARGET_HAS_bswap64_i64      0
+#define TCG_TARGET_HAS_neg_i64          1
+#define TCG_TARGET_HAS_not_i64          1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_orc_i64          1
+#define TCG_TARGET_HAS_eqv_i64          0
+#define TCG_TARGET_HAS_nand_i64         0
+#define TCG_TARGET_HAS_nor_i64          0
+#define TCG_TARGET_HAS_clz_i64          0
+#define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_extract_i64      0
+#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract2_i64     0
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_muluh_i64        use_vis3_instructions
+#define TCG_TARGET_HAS_mulsh_i64        0
+
+#define TCG_AREG0 TCG_REG_I0
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
-- 
cgit v1.1


From a59a293126604183dd63bf8b890393e32e7702c4 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 17 Oct 2022 08:17:45 +0300
Subject: tcg/sparc64: Remove sparc32plus constraints

With sparc64 we need not distinguish between registers that
can hold 32-bit values and those that can hold 64-bit values.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/sparc64/tcg-target-con-set.h |  16 ++----
 tcg/sparc64/tcg-target-con-str.h |   3 --
 tcg/sparc64/tcg-target.c.inc     | 109 +++++++++++++++------------------------
 3 files changed, 44 insertions(+), 84 deletions(-)

diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
index 3b751dc..31e6fea 100644
--- a/tcg/sparc64/tcg-target-con-set.h
+++ b/tcg/sparc64/tcg-target-con-set.h
@@ -11,22 +11,12 @@
  */
 C_O0_I1(r)
 C_O0_I2(rZ, r)
-C_O0_I2(RZ, r)
 C_O0_I2(rZ, rJ)
-C_O0_I2(RZ, RJ)
-C_O0_I2(sZ, A)
-C_O0_I2(SZ, A)
-C_O1_I1(r, A)
-C_O1_I1(R, A)
+C_O0_I2(sZ, s)
+C_O1_I1(r, s)
 C_O1_I1(r, r)
-C_O1_I1(r, R)
-C_O1_I1(R, r)
-C_O1_I1(R, R)
-C_O1_I2(R, R, R)
+C_O1_I2(r, r, r)
 C_O1_I2(r, rZ, rJ)
-C_O1_I2(R, RZ, RJ)
 C_O1_I4(r, rZ, rJ, rI, 0)
-C_O1_I4(R, RZ, RJ, RI, 0)
 C_O2_I2(r, r, rZ, rJ)
-C_O2_I4(R, R, RZ, RZ, RJ, RI)
 C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
index fdb25d9..8f5c7ae 100644
--- a/tcg/sparc64/tcg-target-con-str.h
+++ b/tcg/sparc64/tcg-target-con-str.h
@@ -9,10 +9,7 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('R', ALL_GENERAL_REGS64)
 REGS('s', ALL_QLDST_REGS)
-REGS('S', ALL_QLDST_REGS64)
-REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 097bcfc..cb9453e 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -80,19 +80,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #else
 #define SOFTMMU_RESERVE_REGS 0
 #endif
-
-/*
- * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
- * registers.  These are saved manually by the kernel in full 64-bit
- * slots.  The %i and %l registers are saved by the register window
- * mechanism, which only allocates space for 32 bits.  Given that this
- * window spill/fill can happen on any signal, we must consider the
- * high bits of the %i and %l registers garbage at all times.
- */
 #define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 32)
-# define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
 #define ALL_QLDST_REGS       (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
-#define ALL_QLDST_REGS64     (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
 
 /* Define some temporary registers.  T2 is used for constant generation.  */
 #define TCG_REG_T1  TCG_REG_G1
@@ -1738,107 +1727,91 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
         return C_O0_I1(r);
 
     case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
     case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
     case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
     case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
     case INDEX_op_ld_i32:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
     case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
     case INDEX_op_not_i32:
+    case INDEX_op_not_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extrh_i64_i32:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
     case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
     case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
         return C_O0_I2(rZ, r);
 
     case INDEX_op_add_i32:
+    case INDEX_op_add_i64:
     case INDEX_op_mul_i32:
+    case INDEX_op_mul_i64:
     case INDEX_op_div_i32:
+    case INDEX_op_div_i64:
     case INDEX_op_divu_i32:
+    case INDEX_op_divu_i64:
     case INDEX_op_sub_i32:
+    case INDEX_op_sub_i64:
     case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
     case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
     case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
     case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
     case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
     case INDEX_op_shl_i32:
+    case INDEX_op_shl_i64:
     case INDEX_op_shr_i32:
+    case INDEX_op_shr_i64:
     case INDEX_op_sar_i32:
+    case INDEX_op_sar_i64:
     case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
         return C_O1_I2(r, rZ, rJ);
 
     case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rJ);
     case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
         return C_O1_I4(r, rZ, rJ, rI, 0);
     case INDEX_op_add2_i32:
+    case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
+    case INDEX_op_sub2_i64:
         return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
     case INDEX_op_mulu2_i32:
     case INDEX_op_muls2_i32:
         return C_O2_I2(r, r, rZ, rJ);
-
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-        return C_O1_I1(R, r);
-
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
-        return C_O0_I2(RZ, r);
-
-    case INDEX_op_add_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_div_i64:
-    case INDEX_op_divu_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_setcond_i64:
-        return C_O1_I2(R, RZ, RJ);
-
-    case INDEX_op_neg_i64:
-    case INDEX_op_not_i64:
-    case INDEX_op_ext32s_i64:
-    case INDEX_op_ext32u_i64:
-        return C_O1_I1(R, R);
-
-    case INDEX_op_extrl_i64_i32:
-    case INDEX_op_extrh_i64_i32:
-        return C_O1_I1(r, R);
-
-    case INDEX_op_brcond_i64:
-        return C_O0_I2(RZ, RJ);
-    case INDEX_op_movcond_i64:
-        return C_O1_I4(R, RZ, RJ, RI, 0);
-    case INDEX_op_add2_i64:
-    case INDEX_op_sub2_i64:
-        return C_O2_I4(R, R, RZ, RZ, RJ, RI);
     case INDEX_op_muluh_i64:
-        return C_O1_I2(R, R, R);
+        return C_O1_I2(r, r, r);
 
     case INDEX_op_qemu_ld_i32:
-        return C_O1_I1(r, A);
     case INDEX_op_qemu_ld_i64:
-        return C_O1_I1(R, A);
+        return C_O1_I1(r, s);
     case INDEX_op_qemu_st_i32:
-        return C_O0_I2(sZ, A);
     case INDEX_op_qemu_st_i64:
-        return C_O0_I2(SZ, A);
+        return C_O0_I2(sZ, s);
 
     default:
         g_assert_not_reached();
@@ -1859,7 +1832,7 @@ static void tcg_target_init(TCGContext *s)
 #endif
 
     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
-    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
+    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
 
     tcg_target_call_clobber_regs = 0;
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
-- 
cgit v1.1


From 9dd1d56e570e5119fef2b28fda811d6891e597a8 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <uwu@icenowy.me>
Date: Sat, 29 Oct 2022 06:23:44 +1100
Subject: tcg/tci: fix logic error when registering helpers via FFI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When registering helpers via FFI for TCI, the inner loop that iterates
parameters of the helper reuses (and thus pollutes) the same variable
used by the outer loop that iterates all helpers, thus made some helpers
unregistered.

Fix this logic error by using a dedicated temporary variable for the
inner loop.

Fixes: 22f15579fa ("tcg: Build ffi data structures for helpers")
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
Message-Id: <20221028072145.1593205-1-uwu@icenowy.me>
[rth: Move declaration of j to the for loop itself]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tcg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index c9e664e..b6c46b7 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -634,9 +634,9 @@ static void tcg_context_init(unsigned max_cpus)
 
         if (nargs != 0) {
             ca->cif.arg_types = ca->args;
-            for (i = 0; i < nargs; ++i) {
-                int typecode = extract32(typemask, (i + 1) * 3, 3);
-                ca->args[i] = typecode_to_ffi[typecode];
+            for (int j = 0; j < nargs; ++j) {
+                int typecode = extract32(typemask, (j + 1) * 3, 3);
+                ca->args[j] = typecode_to_ffi[typecode];
             }
         }
 
-- 
cgit v1.1


From 6392bd6b90a488b3254b1cb85d79bf262ed5f9e0 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 22:15:04 +1000
Subject: accel/tcg: Introduce cpu_unwind_state_data

Add a way to examine the unwind data without actually
restoring the data back into env.

Reviewed-by: Claudio Fontana <cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal.h      |  4 +--
 accel/tcg/translate-all.c | 74 +++++++++++++++++++++++++++++++----------------
 include/exec/exec-all.h   | 21 +++++++++++---
 3 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 1227bb6..9c06b32 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -106,8 +106,8 @@ void tb_reset_jump(TranslationBlock *tb, int n);
 TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                                tb_page_addr_t phys_page2);
 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                              uintptr_t searched_pc, bool reset_icount);
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+                               uintptr_t host_pc, bool reset_icount);
 
 /* Return the current PC from CPU, which may be cached in TB. */
 static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index f185356..319becb 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -247,52 +247,66 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
     return p - block;
 }
 
-/* The cpu state corresponding to 'searched_pc' is restored.
- * When reset_icount is true, current TB will be interrupted and
- * icount should be recalculated.
- */
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                              uintptr_t searched_pc, bool reset_icount)
+static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
+                                   uint64_t *data)
 {
-    uint64_t data[TARGET_INSN_START_WORDS];
-    uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
+    uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
     const uint8_t *p = tb->tc.ptr + tb->tc.size;
     int i, j, num_insns = tb->icount;
-#ifdef CONFIG_PROFILER
-    TCGProfile *prof = &tcg_ctx->prof;
-    int64_t ti = profile_getclock();
-#endif
 
-    searched_pc -= GETPC_ADJ;
+    host_pc -= GETPC_ADJ;
 
-    if (searched_pc < host_pc) {
+    if (host_pc < iter_pc) {
         return -1;
     }
 
-    memset(data, 0, sizeof(data));
+    memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
     if (!TARGET_TB_PCREL) {
         data[0] = tb_pc(tb);
     }
 
-    /* Reconstruct the stored insn data while looking for the point at
-       which the end of the insn exceeds the searched_pc.  */
+    /*
+     * Reconstruct the stored insn data while looking for the point
+     * at which the end of the insn exceeds host_pc.
+     */
     for (i = 0; i < num_insns; ++i) {
         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
             data[j] += decode_sleb128(&p);
         }
-        host_pc += decode_sleb128(&p);
-        if (host_pc > searched_pc) {
-            goto found;
+        iter_pc += decode_sleb128(&p);
+        if (iter_pc > host_pc) {
+            return num_insns - i;
         }
     }
     return -1;
+}
+
+/*
+ * The cpu state corresponding to 'host_pc' is restored.
+ * When reset_icount is true, current TB will be interrupted and
+ * icount should be recalculated.
+ */
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
+                               uintptr_t host_pc, bool reset_icount)
+{
+    uint64_t data[TARGET_INSN_START_WORDS];
+#ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx->prof;
+    int64_t ti = profile_getclock();
+#endif
+    int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
+
+    if (insns_left < 0) {
+        return;
+    }
 
- found:
     if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
         assert(icount_enabled());
-        /* Reset the cycle counter to the start of the block
-           and shift if to the number of actually executed instructions */
-        cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
+        /*
+         * Reset the cycle counter to the start of the block and
+         * shift if to the number of actually executed instructions.
+         */
+        cpu_neg(cpu)->icount_decr.u16.low += insns_left;
     }
 
     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
@@ -302,7 +316,6 @@ int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                 prof->restore_time + profile_getclock() - ti);
     qatomic_set(&prof->restore_count, prof->restore_count + 1);
 #endif
-    return 0;
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
@@ -335,6 +348,17 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
     return false;
 }
 
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
+{
+    if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
+        TranslationBlock *tb = tcg_tb_lookup(host_pc);
+        if (tb) {
+            return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
+        }
+    }
+    return false;
+}
+
 void page_init(void)
 {
     page_size_init();
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index e948992..7d851f5 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -40,19 +40,32 @@ typedef ram_addr_t tb_page_addr_t;
 #endif
 
 /**
+ * cpu_unwind_state_data:
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
+ * @data: output data
+ *
+ * Attempt to load the the unwind state for a host pc occurring in
+ * translated code.  If @host_pc is not in translated code, the
+ * function returns false; otherwise @data is loaded.
+ * This is the same unwind info as given to restore_state_to_opc.
+ */
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
+
+/**
  * cpu_restore_state:
- * @cpu: the vCPU state is to be restore to
- * @searched_pc: the host PC the fault occurred at
+ * @cpu: the cpu context
+ * @host_pc: the host pc within the translation
  * @will_exit: true if the TB executed will be interrupted after some
                cpu adjustments. Required for maintaining the correct
                icount valus
  * @return: true if state was restored, false otherwise
  *
  * Attempt to restore the state for a fault occurring in translated
- * code. If the searched_pc is not in translated code no state is
+ * code. If @host_pc is not in translated code no state is
  * restored and the function returns false.
  */
-bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit);
 
 G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
 G_NORETURN void cpu_loop_exit(CPUState *cpu);
-- 
cgit v1.1


From f484f213c9f4ae1cd30ebdaadc7b539d745d39fb Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 22:45:29 +1000
Subject: target/i386: Use cpu_unwind_state_data for tpr access

Avoid cpu_restore_state, and modifying env->eip out from
underneath the translator with TARGET_TB_PCREL.  There is
some slight duplication from x86_restore_state_to_opc,
but it's just a few lines.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1269
Reviewed-by: Claudio Fontana <cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/helper.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/target/i386/helper.c b/target/i386/helper.c
index b62a1e4..0ac2da0 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -509,6 +509,27 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
     }
 }
 
+static inline target_ulong get_memio_eip(CPUX86State *env)
+{
+#ifdef CONFIG_TCG
+    uint64_t data[TARGET_INSN_START_WORDS];
+    CPUState *cs = env_cpu(env);
+
+    if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) {
+        return env->eip;
+    }
+
+    /* Per x86_restore_state_to_opc. */
+    if (TARGET_TB_PCREL) {
+        return (env->eip & TARGET_PAGE_MASK) | data[0];
+    } else {
+        return data[0] - env->segs[R_CS].base;
+    }
+#else
+    qemu_build_not_reached();
+#endif
+}
+
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 {
     X86CPU *cpu = env_archcpu(env);
@@ -519,9 +540,9 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 
         cpu_interrupt(cs, CPU_INTERRUPT_TPR);
     } else if (tcg_enabled()) {
-        cpu_restore_state(cs, cs->mem_io_pc, false);
+        target_ulong eip = get_memio_eip(env);
 
-        apic_handle_tpr_access_report(cpu->apic_state, env->eip, access);
+        apic_handle_tpr_access_report(cpu->apic_state, eip, access);
     }
 }
 #endif /* !CONFIG_USER_ONLY */
-- 
cgit v1.1


From 5813c5c74a755fd0c1b10be38c6fdf5c54c468e4 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 22:54:15 +1000
Subject: target/openrisc: Always exit after mtspr npc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have called cpu_restore_state asserting will_exit.
Do not go back on that promise.  This affects icount.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/openrisc/sys_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 09b3c97..a3508e4 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -51,8 +51,8 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
         if (env->pc != rb) {
             env->pc = rb;
             env->dflag = 0;
-            cpu_loop_exit(cs);
         }
+        cpu_loop_exit(cs);
         break;
 
     case TO_SPR(0, 17): /* SR */
-- 
cgit v1.1


From cc30dc441b44ad15f4adfb13d9a68cba6fa39a23 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 22:55:26 +1000
Subject: target/openrisc: Use cpu_unwind_state_data for mfspr

Since we do not plan to exit, use cpu_unwind_state_data
and extract exactly the data requested.

This is a bug fix, in that we no longer clobber dflag.

Consider:

        l.j       L2         // branch
        l.mfspr   r1, ppc    // delay

L1:     boom
L2:     l.lwa     r3, (r4)

Here, dflag would be set by cpu_restore_state (because that is the current
state of the cpu), but but not cleared by tb_stop on exiting the TB
(because DisasContext has recorded the current value as zero).

The next TB begins at L2 with dflag incorrectly set.  If the load has a
tlb miss, then the exception will be delivered as per a delay slot:
with DSX set in the status register and PC decremented (delay slots
restart by re-executing the branch). This will cause the return from
interrupt to go to L1, and boom!

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/openrisc/sys_helper.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index a3508e4..dde2fa1 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -199,6 +199,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
                            target_ulong spr)
 {
 #ifndef CONFIG_USER_ONLY
+    uint64_t data[TARGET_INSN_START_WORDS];
     MachineState *ms = MACHINE(qdev_get_machine());
     OpenRISCCPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
@@ -232,14 +233,20 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
         return env->evbar;
 
     case TO_SPR(0, 16): /* NPC (equals PC) */
-        cpu_restore_state(cs, GETPC(), false);
+        if (cpu_unwind_state_data(cs, GETPC(), data)) {
+            return data[0];
+        }
         return env->pc;
 
     case TO_SPR(0, 17): /* SR */
         return cpu_get_sr(env);
 
     case TO_SPR(0, 18): /* PPC */
-        cpu_restore_state(cs, GETPC(), false);
+        if (cpu_unwind_state_data(cs, GETPC(), data)) {
+            if (data[1] & 2) {
+                return data[0] - 4;
+            }
+        }
         return env->ppc;
 
     case TO_SPR(0, 32): /* EPCR */
-- 
cgit v1.1


From 3d419a4dd227f174447e0b3978028a1cd52ccc5e Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 23:09:57 +1000
Subject: accel/tcg: Remove will_exit argument from cpu_restore_state

The value passed is always true, and if the target's
synchronize_from_tb hook is non-trivial, not exiting
may be erroneous.

Reviewed-by: Claudio Fontana <cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec-common.c         |  2 +-
 accel/tcg/translate-all.c           | 12 ++----------
 include/exec/exec-all.h             |  5 +----
 target/alpha/helper.c               |  2 +-
 target/alpha/mem_helper.c           |  2 +-
 target/arm/op_helper.c              |  2 +-
 target/arm/tlb_helper.c             |  8 ++++----
 target/cris/helper.c                |  2 +-
 target/i386/tcg/sysemu/svm_helper.c |  2 +-
 target/m68k/op_helper.c             |  4 ++--
 target/microblaze/helper.c          |  2 +-
 target/nios2/op_helper.c            |  2 +-
 target/openrisc/sys_helper.c        |  4 ++--
 target/ppc/excp_helper.c            |  2 +-
 target/s390x/tcg/excp_helper.c      |  2 +-
 target/tricore/op_helper.c          |  2 +-
 target/xtensa/helper.c              |  6 +++---
 17 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index be6fe45..c7bc8c6 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -71,7 +71,7 @@ void cpu_loop_exit(CPUState *cpu)
 void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
 {
     if (pc) {
-        cpu_restore_state(cpu, pc, true);
+        cpu_restore_state(cpu, pc);
     }
     cpu_loop_exit(cpu);
 }
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 319becb..90997fe 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -318,17 +318,9 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 #endif
 }
 
-bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
 {
     /*
-     * The pc update associated with restore without exit will
-     * break the relative pc adjustments performed by TARGET_TB_PCREL.
-     */
-    if (TARGET_TB_PCREL) {
-        assert(will_exit);
-    }
-
-    /*
      * The host_pc has to be in the rx region of the code buffer.
      * If it is not we will not be able to resolve it here.
      * The two cases where host_pc will not be correct are:
@@ -341,7 +333,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
         TranslationBlock *tb = tcg_tb_lookup(host_pc);
         if (tb) {
-            cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
+            cpu_restore_state_from_tb(cpu, tb, host_pc, true);
             return true;
         }
     }
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 7d851f5..9b7bfbf 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -56,16 +56,13 @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
  * cpu_restore_state:
  * @cpu: the cpu context
  * @host_pc: the host pc within the translation
- * @will_exit: true if the TB executed will be interrupted after some
-               cpu adjustments. Required for maintaining the correct
-               icount valus
  * @return: true if state was restored, false otherwise
  *
  * Attempt to restore the state for a fault occurring in translated
  * code. If @host_pc is not in translated code no state is
  * restored and the function returns false.
  */
-bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit);
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc);
 
 G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
 G_NORETURN void cpu_loop_exit(CPUState *cpu);
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index a5a389b..970c869 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -532,7 +532,7 @@ G_NORETURN void dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
     cs->exception_index = excp;
     env->error_code = error;
     if (retaddr) {
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         /* Floating-point exceptions (our only users) point to the next PC.  */
         env->pc += 4;
     }
diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c
index 47283a0..a39b52c 100644
--- a/target/alpha/mem_helper.c
+++ b/target/alpha/mem_helper.c
@@ -28,7 +28,7 @@ static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retadd
     uint64_t pc;
     uint32_t insn;
 
-    cpu_restore_state(env_cpu(env), retaddr, true);
+    cpu_restore_state(env_cpu(env), retaddr);
 
     pc = env->pc;
     insn = cpu_ldl_code(env, pc);
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c5bde1c..70672bc 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -78,7 +78,7 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
      * we must restore CPU state here before setting the syndrome
      * the caller passed us, and cannot use cpu_loop_exit_restore().
      */
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     raise_exception(env, excp, syndrome, target_el);
 }
 
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 69b0dc6..0f4f4fc 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -156,7 +156,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
     ARMMMUFaultInfo fi = {};
 
     /* now we have a real cpu fault */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     fi.type = ARMFault_Alignment;
     arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
@@ -196,7 +196,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
     ARMMMUFaultInfo fi = {};
 
     /* now we have a real cpu fault */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     fi.ea = arm_extabort_type(response);
     fi.type = ARMFault_SyncExternal;
@@ -252,7 +252,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
         return false;
     } else {
         /* now we have a real cpu fault */
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
     }
 }
@@ -271,7 +271,7 @@ void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
      * We report both ESR and FAR to signal handlers.
      * For now, it's easiest to deliver the fault normally.
      */
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
 }
 
diff --git a/target/cris/helper.c b/target/cris/helper.c
index 91e4aeb..81a7269 100644
--- a/target/cris/helper.c
+++ b/target/cris/helper.c
@@ -87,7 +87,7 @@ bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     cs->exception_index = EXCP_BUSFAULT;
     env->fault_vector = res.bf_vec;
     if (retaddr) {
-        if (cpu_restore_state(cs, retaddr, true)) {
+        if (cpu_restore_state(cs, retaddr)) {
             /* Evaluate flags after retranslation. */
             helper_top_evaluate_flags(env);
         }
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 8e88567..2d27731 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -704,7 +704,7 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
 {
     CPUState *cs = env_cpu(env);
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016"
                   PRIx64 ", " TARGET_FMT_lx ")!\n",
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index 5da176d..1ce850b 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -460,7 +460,7 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
     M68kCPU *cpu = M68K_CPU(cs);
     CPUM68KState *env = &cpu->env;
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
 
     if (m68k_feature(env, M68K_FEATURE_M68040)) {
         env->mmu.mmusr = 0;
@@ -558,7 +558,7 @@ raise_exception_format2(CPUM68KState *env, int tt, int ilen, uintptr_t raddr)
     cs->exception_index = tt;
 
     /* Recover PC and CC_OP for the beginning of the insn.  */
-    cpu_restore_state(cs, raddr, true);
+    cpu_restore_state(cs, raddr);
 
     /* Flags are current in env->cc_*, or are undefined. */
     env->cc_op = CC_OP_FLAGS;
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index a607fe6..98bdb82 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -277,7 +277,7 @@ void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     uint32_t esr, iflags;
 
     /* Recover the pc and iflags from the corresponding insn_start.  */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     iflags = cpu->env.iflags;
 
     qemu_log_mask(CPU_LOG_INT,
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index 2e30d0a..0aaf33f 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -40,7 +40,7 @@ void nios2_cpu_loop_exit_advance(CPUNios2State *env, uintptr_t retaddr)
      * Do this here, rather than in restore_state_to_opc(),
      * lest we affect QEMU internal exceptions, like EXCP_DEBUG.
      */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     env->pc += 4;
     cpu_loop_exit(cs);
 }
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index dde2fa1..ec14596 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -45,7 +45,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
         break;
 
     case TO_SPR(0, 16): /* NPC */
-        cpu_restore_state(cs, GETPC(), true);
+        cpu_restore_state(cs, GETPC());
         /* ??? Mirror or1ksim in not trashing delayed branch state
            when "jumping" to the current instruction.  */
         if (env->pc != rb) {
@@ -131,7 +131,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
     case TO_SPR(8, 0):  /* PMR */
         env->pmr = rb;
         if (env->pmr & PMR_DME || env->pmr & PMR_SME) {
-            cpu_restore_state(cs, GETPC(), true);
+            cpu_restore_state(cs, GETPC());
             env->pc += 4;
             cs->halted = 1;
             raise_exception(cpu, EXCP_HALTED);
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 09a8156..a05a2ed 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -3075,7 +3075,7 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
     uint32_t insn;
 
     /* Restore state and reload the insn we executed, for filling in DSISR.  */
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     insn = cpu_ldl_code(env, env->nip);
 
     switch (env->mmu_model) {
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
index 29ccf70..2cd6d06 100644
--- a/target/s390x/tcg/excp_helper.c
+++ b/target/s390x/tcg/excp_helper.c
@@ -39,7 +39,7 @@ G_NORETURN void tcg_s390_program_interrupt(CPUS390XState *env,
 {
     CPUState *cs = env_cpu(env);
 
-    cpu_restore_state(cs, ra, true);
+    cpu_restore_state(cs, ra);
     qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
                   env->psw.addr);
     trigger_pgm_exception(env, code);
diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
index a79c838..532ae6b 100644
--- a/target/tricore/op_helper.c
+++ b/target/tricore/op_helper.c
@@ -31,7 +31,7 @@ void raise_exception_sync_internal(CPUTriCoreState *env, uint32_t class, int tin
 {
     CPUState *cs = env_cpu(env);
     /* in case we come from a helper-call we need to restore the PC */
-    cpu_restore_state(cs, pc, true);
+    cpu_restore_state(cs, pc);
 
     /* Tin is loaded into d[15] */
     env->gpr_d[15] = tin;
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index e0a9caa..2aa9777 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -253,7 +253,7 @@ void xtensa_cpu_do_unaligned_access(CPUState *cs,
 
     assert(xtensa_option_enabled(env->config,
                                  XTENSA_OPTION_UNALIGNED_EXCEPTION));
-    cpu_restore_state(CPU(cpu), retaddr, true);
+    cpu_restore_state(CPU(cpu), retaddr);
     HELPER(exception_cause_vaddr)(env,
                                   env->pc, LOAD_STORE_ALIGNMENT_CAUSE,
                                   addr);
@@ -284,7 +284,7 @@ bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     } else if (probe) {
         return false;
     } else {
-        cpu_restore_state(cs, retaddr, true);
+        cpu_restore_state(cs, retaddr);
         HELPER(exception_cause_vaddr)(env, env->pc, ret, address);
     }
 }
@@ -297,7 +297,7 @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
     XtensaCPU *cpu = XTENSA_CPU(cs);
     CPUXtensaState *env = &cpu->env;
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(cs, retaddr);
     HELPER(exception_cause_vaddr)(env, env->pc,
                                   access_type == MMU_INST_FETCH ?
                                   INSTR_PIF_ADDR_ERROR_CAUSE :
-- 
cgit v1.1


From cfa29dd50611a0ecea9888818692290148773c0d Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 23:12:56 +1000
Subject: accel/tcg: Remove reset_icount argument from
 cpu_restore_state_from_tb

The value passed is always true.

Reviewed-by: Claudio Fontana <cfontana@suse.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal.h      |  2 +-
 accel/tcg/tb-maint.c      |  4 ++--
 accel/tcg/translate-all.c | 15 +++++++--------
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 9c06b32..cb13bad 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -107,7 +107,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                                tb_page_addr_t phys_page2);
 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                               uintptr_t host_pc, bool reset_icount);
+                               uintptr_t host_pc);
 
 /* Return the current PC from CPU, which may be cached in TB. */
 static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index c8e9210..0cdb355 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -536,7 +536,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
                  * restore the CPU state.
                  */
                 current_tb_modified = true;
-                cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
+                cpu_restore_state_from_tb(cpu, current_tb, retaddr);
             }
 #endif /* TARGET_HAS_PRECISE_SMC */
             tb_phys_invalidate__locked(tb);
@@ -685,7 +685,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
              * function to partially restore the CPU state.
              */
             current_tb_modified = true;
-            cpu_restore_state_from_tb(cpu, current_tb, pc, true);
+            cpu_restore_state_from_tb(cpu, current_tb, pc);
         }
 #endif /* TARGET_HAS_PRECISE_SMC */
         tb_phys_invalidate(tb, addr);
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 90997fe..0089578 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -282,12 +282,11 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
 }
 
 /*
- * The cpu state corresponding to 'host_pc' is restored.
- * When reset_icount is true, current TB will be interrupted and
- * icount should be recalculated.
+ * The cpu state corresponding to 'host_pc' is restored in
+ * preparation for exiting the TB.
  */
 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
-                               uintptr_t host_pc, bool reset_icount)
+                               uintptr_t host_pc)
 {
     uint64_t data[TARGET_INSN_START_WORDS];
 #ifdef CONFIG_PROFILER
@@ -300,7 +299,7 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
         return;
     }
 
-    if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
+    if (tb_cflags(tb) & CF_USE_ICOUNT) {
         assert(icount_enabled());
         /*
          * Reset the cycle counter to the start of the block and
@@ -333,7 +332,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
         TranslationBlock *tb = tcg_tb_lookup(host_pc);
         if (tb) {
-            cpu_restore_state_from_tb(cpu, tb, host_pc, true);
+            cpu_restore_state_from_tb(cpu, tb, host_pc);
             return true;
         }
     }
@@ -1032,7 +1031,7 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
     tb = tcg_tb_lookup(retaddr);
     if (tb) {
         /* We can use retranslation to find the PC.  */
-        cpu_restore_state_from_tb(cpu, tb, retaddr, true);
+        cpu_restore_state_from_tb(cpu, tb, retaddr);
         tb_phys_invalidate(tb, -1);
     } else {
         /* The exception probably happened in a helper.  The CPU state should
@@ -1068,7 +1067,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
                   (void *)retaddr);
     }
-    cpu_restore_state_from_tb(cpu, tb, retaddr, true);
+    cpu_restore_state_from_tb(cpu, tb, retaddr);
 
     /*
      * Some guests must re-execute the branch when re-executing a delay
-- 
cgit v1.1


From 631793308679cf0436cd7145a9ff318331c982c9 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 24 Oct 2022 16:16:30 +1000
Subject: target/i386: Expand eflags updates inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The helpers for reset_rf, cli, sti, clac, stac are
completely trivial; implement them inline.

Drop some nearby #if 0 code.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/helper.h        |  5 -----
 target/i386/tcg/cc_helper.c | 41 -----------------------------------------
 target/i386/tcg/translate.c | 30 +++++++++++++++++++++++++-----
 3 files changed, 25 insertions(+), 51 deletions(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index 88143b2..b7de542 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -56,13 +56,8 @@ DEF_HELPER_2(syscall, void, env, int)
 DEF_HELPER_2(sysret, void, env, int)
 #endif
 DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
-DEF_HELPER_1(reset_rf, void, env)
 DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
-DEF_HELPER_1(cli, void, env)
-DEF_HELPER_1(sti, void, env)
-DEF_HELPER_1(clac, void, env)
-DEF_HELPER_1(stac, void, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index cc7ea9e..6227dbb 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -346,44 +346,3 @@ void helper_clts(CPUX86State *env)
     env->cr[0] &= ~CR0_TS_MASK;
     env->hflags &= ~HF_TS_MASK;
 }
-
-void helper_reset_rf(CPUX86State *env)
-{
-    env->eflags &= ~RF_MASK;
-}
-
-void helper_cli(CPUX86State *env)
-{
-    env->eflags &= ~IF_MASK;
-}
-
-void helper_sti(CPUX86State *env)
-{
-    env->eflags |= IF_MASK;
-}
-
-void helper_clac(CPUX86State *env)
-{
-    env->eflags &= ~AC_MASK;
-}
-
-void helper_stac(CPUX86State *env)
-{
-    env->eflags |= AC_MASK;
-}
-
-#if 0
-/* vm86plus instructions */
-void helper_cli_vm(CPUX86State *env)
-{
-    env->eflags &= ~VIF_MASK;
-}
-
-void helper_sti_vm(CPUX86State *env)
-{
-    env->eflags |= VIF_MASK;
-    if (env->eflags & VIP_MASK) {
-        raise_exception_ra(env, EXCP0D_GPF, GETPC());
-    }
-}
-#endif
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 546c427..0ee548c 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2746,6 +2746,26 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask)
     }
 }
 
+static void gen_set_eflags(DisasContext *s, target_ulong mask)
+{
+    TCGv t = tcg_temp_new();
+
+    tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_gen_ori_tl(t, t, mask);
+    tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_temp_free(t);
+}
+
+static void gen_reset_eflags(DisasContext *s, target_ulong mask)
+{
+    TCGv t = tcg_temp_new();
+
+    tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_gen_andi_tl(t, t, ~mask);
+    tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
+    tcg_temp_free(t);
+}
+
 /* Clear BND registers during legacy branches.  */
 static void gen_bnd_jmp(DisasContext *s)
 {
@@ -2776,7 +2796,7 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
     }
 
     if (s->base.tb->flags & HF_RF_MASK) {
-        gen_helper_reset_rf(cpu_env);
+        gen_reset_eflags(s, RF_MASK);
     }
     if (recheck_tf) {
         gen_helper_rechecking_single_step(cpu_env);
@@ -5502,12 +5522,12 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
 #endif
     case 0xfa: /* cli */
         if (check_iopl(s)) {
-            gen_helper_cli(cpu_env);
+            gen_reset_eflags(s, IF_MASK);
         }
         break;
     case 0xfb: /* sti */
         if (check_iopl(s)) {
-            gen_helper_sti(cpu_env);
+            gen_set_eflags(s, IF_MASK);
             /* interruptions are enabled only the first insn after sti */
             gen_update_eip_next(s);
             gen_eob_inhibit_irq(s, true);
@@ -5789,7 +5809,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                 || CPL(s) != 0) {
                 goto illegal_op;
             }
-            gen_helper_clac(cpu_env);
+            gen_reset_eflags(s, AC_MASK);
             s->base.is_jmp = DISAS_EOB_NEXT;
             break;
 
@@ -5798,7 +5818,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
                 || CPL(s) != 0) {
                 goto illegal_op;
             }
-            gen_helper_stac(cpu_env);
+            gen_set_eflags(s, AC_MASK);
             s->base.is_jmp = DISAS_EOB_NEXT;
             break;
 
-- 
cgit v1.1


From 4e4fa6c12d97ee3ee87623c153009a5abd7b428e Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 31 Oct 2022 13:26:36 +1100
Subject: accel/tcg: Complete cpu initialization before registration

Delay cpu_list_add until realize is complete, so that cross-cpu
interaction does not happen with incomplete cpu state.  For this,
we must delay plugin initialization out of tcg_exec_realizefn,
because no cpu_index has been assigned.

Fixes a problem with cross-cpu jump cache flushing, when the
jump cache has not yet been allocated.

Fixes: a976a99a2975 ("include/hw/core: Create struct CPUJumpCache")
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reported-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec.c      |  8 +++++---
 accel/tcg/translate-all.c | 16 +++++++---------
 cpu.c                     | 10 +++++++++-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 82b06c1..356fe34 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1052,23 +1052,25 @@ void tcg_exec_realizefn(CPUState *cpu, Error **errp)
         cc->tcg_ops->initialize();
         tcg_target_initialized = true;
     }
-    tlb_init(cpu);
-    qemu_plugin_vcpu_init_hook(cpu);
 
+    cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
+    tlb_init(cpu);
 #ifndef CONFIG_USER_ONLY
     tcg_iommu_init_notifier_list(cpu);
 #endif /* !CONFIG_USER_ONLY */
+    /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
 }
 
 /* undo the initializations in reverse order */
 void tcg_exec_unrealizefn(CPUState *cpu)
 {
+    qemu_plugin_vcpu_exit_hook(cpu);
 #ifndef CONFIG_USER_ONLY
     tcg_iommu_free_notifier_list(cpu);
 #endif /* !CONFIG_USER_ONLY */
 
-    qemu_plugin_vcpu_exit_hook(cpu);
     tlb_destroy(cpu);
+    g_free(cpu->tb_jmp_cache);
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 0089578..921944a 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1580,15 +1580,13 @@ void tcg_flush_jmp_cache(CPUState *cpu)
 {
     CPUJumpCache *jc = cpu->tb_jmp_cache;
 
-    if (likely(jc)) {
-        for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
-            qatomic_set(&jc->array[i].tb, NULL);
-        }
-    } else {
-        /* This should happen once during realize, and thus never race. */
-        jc = g_new0(CPUJumpCache, 1);
-        jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
-        assert(jc == NULL);
+    /* During early initialization, the cache may not yet be allocated. */
+    if (unlikely(jc == NULL)) {
+        return;
+    }
+
+    for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
+        qatomic_set(&jc->array[i].tb, NULL);
     }
 }
 
diff --git a/cpu.c b/cpu.c
index 2a09b05..4a7d865 100644
--- a/cpu.c
+++ b/cpu.c
@@ -134,15 +134,23 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     /* cache the cpu class for the hotpath */
     cpu->cc = CPU_GET_CLASS(cpu);
 
-    cpu_list_add(cpu);
     if (!accel_cpu_realizefn(cpu, errp)) {
         return;
     }
+
     /* NB: errp parameter is unused currently */
     if (tcg_enabled()) {
         tcg_exec_realizefn(cpu, errp);
     }
 
+    /* Wait until cpu initialization complete before exposing cpu. */
+    cpu_list_add(cpu);
+
+    /* Plugin initialization must wait until cpu_index assigned. */
+    if (tcg_enabled()) {
+        qemu_plugin_vcpu_init_hook(cpu);
+    }
+
 #ifdef CONFIG_USER_ONLY
     assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
            qdev_get_vmsd(DEVICE(cpu))->unmigratable);
-- 
cgit v1.1


From 83d92559cdf0ce842e52e5bbf230f7f62a6206aa Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 28 Oct 2022 14:42:27 +0200
Subject: tests/tcg/multiarch: Add munmap-pthread.c

Add a test to detect races between munmap() and creating new threads.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-Id: <20221028124227.2354792-3-iii@linux.ibm.com>
[rth: add more return insns]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tests/tcg/multiarch/Makefile.target  |  3 ++
 tests/tcg/multiarch/munmap-pthread.c | 79 ++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 tests/tcg/multiarch/munmap-pthread.c

diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index 78104f9..5f0fee1 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -36,6 +36,9 @@ threadcount: LDFLAGS+=-lpthread
 
 signals: LDFLAGS+=-lrt -lpthread
 
+munmap-pthread: CFLAGS+=-pthread
+munmap-pthread: LDFLAGS+=-pthread
+
 # We define the runner for test-mmap after the individual
 # architectures have defined their supported pages sizes. If no
 # additional page sizes are defined we only run the default test.
diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c
new file mode 100644
index 0000000..d7143b0
--- /dev/null
+++ b/tests/tcg/multiarch/munmap-pthread.c
@@ -0,0 +1,79 @@
+/* Test that munmap() and thread creation do not race. */
+#include <assert.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+static const char nop_func[] = {
+#if defined(__aarch64__)
+    0xc0, 0x03, 0x5f, 0xd6,     /* ret */
+#elif defined(__alpha__)
+    0x01, 0x80, 0xFA, 0x6B,     /* ret */
+#elif defined(__arm__)
+    0x1e, 0xff, 0x2f, 0xe1,     /* bx lr */
+#elif defined(__riscv)
+    0x67, 0x80, 0x00, 0x00,     /* ret */
+#elif defined(__s390__)
+    0x07, 0xfe,                 /* br %r14 */
+#elif defined(__i386__) || defined(__x86_64__)
+    0xc3,                       /* ret */
+#endif
+};
+
+static void *thread_mmap_munmap(void *arg)
+{
+    volatile bool *run = arg;
+    char *p;
+    int ret;
+
+    while (*run) {
+        p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC,
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        assert(p != MAP_FAILED);
+
+        /* Create a small translation block.  */
+        memcpy(p, nop_func, sizeof(nop_func));
+        ((void(*)(void))p)();
+
+        ret = munmap(p, getpagesize());
+        assert(ret == 0);
+    }
+
+    return NULL;
+}
+
+static void *thread_dummy(void *arg)
+{
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t mmap_munmap, dummy;
+    volatile bool run = true;
+    int i, ret;
+
+    /* Without a template, nothing to test. */
+    if (sizeof(nop_func) == 0) {
+        return EXIT_SUCCESS;
+    }
+
+    ret = pthread_create(&mmap_munmap, NULL, thread_mmap_munmap, (void *)&run);
+    assert(ret == 0);
+
+    for (i = 0; i < 1000; i++) {
+        ret = pthread_create(&dummy, NULL, thread_dummy, NULL);
+        assert(ret == 0);
+        ret = pthread_join(dummy, NULL);
+        assert(ret == 0);
+    }
+
+    run = false;
+    ret = pthread_join(mmap_munmap, NULL);
+    assert(ret == 0);
+
+    return EXIT_SUCCESS;
+}
-- 
cgit v1.1