diff options
60 files changed, 11882 insertions, 9322 deletions
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 688984f..f26b837 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -239,7 +239,7 @@ Jumps/Labels - | Jump to label. - * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* + * - brcond *t0*, *t1*, *cond*, *label* - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | @@ -261,98 +261,117 @@ Arithmetic .. list-table:: - * - add_i32/i64 *t0*, *t1*, *t2* + * - add *t0*, *t1*, *t2* - | *t0* = *t1* + *t2* - * - sub_i32/i64 *t0*, *t1*, *t2* + * - sub *t0*, *t1*, *t2* - | *t0* = *t1* - *t2* - * - neg_i32/i64 *t0*, *t1* + * - neg *t0*, *t1* - | *t0* = -*t1* (two's complement) - * - mul_i32/i64 *t0*, *t1*, *t2* + * - mul *t0*, *t1*, *t2* - | *t0* = *t1* * *t2* - * - div_i32/i64 *t0*, *t1*, *t2* + * - divs *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (signed) | Undefined behavior if division by zero or overflow. - * - divu_i32/i64 *t0*, *t1*, *t2* + * - divu *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (unsigned) | Undefined behavior if division by zero. - * - rem_i32/i64 *t0*, *t1*, *t2* + * - rems *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (signed) | Undefined behavior if division by zero or overflow. - * - remu_i32/i64 *t0*, *t1*, *t2* + * - remu *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (unsigned) | Undefined behavior if division by zero. + * - divs2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (signed) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass *nh* as a simple sign-extension of *nl*, so the only + overflow should be *INT_MIN* / -1. + + * - divu2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (unsigned) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass 0 to *nh* to make a simple zero-extension of *nl*, + so overflow should never occur. Logical ------- .. list-table:: - * - and_i32/i64 *t0*, *t1*, *t2* + * - and *t0*, *t1*, *t2* - | *t0* = *t1* & *t2* - * - or_i32/i64 *t0*, *t1*, *t2* + * - or *t0*, *t1*, *t2* - | *t0* = *t1* | *t2* - * - xor_i32/i64 *t0*, *t1*, *t2* + * - xor *t0*, *t1*, *t2* - | *t0* = *t1* ^ *t2* - * - not_i32/i64 *t0*, *t1* + * - not *t0*, *t1* - | *t0* = ~\ *t1* - * - andc_i32/i64 *t0*, *t1*, *t2* + * - andc *t0*, *t1*, *t2* - | *t0* = *t1* & ~\ *t2* - * - eqv_i32/i64 *t0*, *t1*, *t2* + * - eqv *t0*, *t1*, *t2* - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* - * - nand_i32/i64 *t0*, *t1*, *t2* + * - nand *t0*, *t1*, *t2* - | *t0* = ~(*t1* & *t2*) - * - nor_i32/i64 *t0*, *t1*, *t2* + * - nor *t0*, *t1*, *t2* - | *t0* = ~(*t1* | *t2*) - * - orc_i32/i64 *t0*, *t1*, *t2* + * - orc *t0*, *t1*, *t2* - | *t0* = *t1* | ~\ *t2* - * - clz_i32/i64 *t0*, *t1*, *t2* + * - clz *t0*, *t1*, *t2* - | *t0* = *t1* ? clz(*t1*) : *t2* - * - ctz_i32/i64 *t0*, *t1*, *t2* + * - ctz *t0*, *t1*, *t2* - | *t0* = *t1* ? ctz(*t1*) : *t2* - * - ctpop_i32/i64 *t0*, *t1* + * - ctpop *t0*, *t1* - | *t0* = number of bits set in *t1* | - | With *ctpop* short for "count population", matching - | the function name used in ``include/qemu/host-utils.h``. + | The name *ctpop* is short for "count population", and matches + the function name used in ``include/qemu/host-utils.h``. Shifts/Rotates @@ -360,30 +379,30 @@ Shifts/Rotates .. list-table:: - * - shl_i32/i64 *t0*, *t1*, *t2* + * - shl *t0*, *t1*, *t2* - | *t0* = *t1* << *t2* - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - shr_i32/i64 *t0*, *t1*, *t2* + * - shr *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (unsigned) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - sar_i32/i64 *t0*, *t1*, *t2* + * - sar *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (signed) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotl_i32/i64 *t0*, *t1*, *t2* + * - rotl *t0*, *t1*, *t2* - | Rotation of *t2* bits to the left - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotr_i32/i64 *t0*, *t1*, *t2* + * - rotr *t0*, *t1*, *t2* - | Rotation of *t2* bits to the right. - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. Misc @@ -391,26 +410,12 @@ Misc .. list-table:: - * - mov_i32/i64 *t0*, *t1* + * - mov *t0*, *t1* - | *t0* = *t1* - | Move *t1* to *t0* (both operands must have the same type). - - * - ext8s_i32/i64 *t0*, *t1* - - ext8u_i32/i64 *t0*, *t1* - - ext16s_i32/i64 *t0*, *t1* - - ext16u_i32/i64 *t0*, *t1* + | Move *t1* to *t0*. - ext32s_i64 *t0*, *t1* - - ext32u_i64 *t0*, *t1* - - - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) - - * - bswap16_i32/i64 *t0*, *t1*, *flags* + * - bswap16 *t0*, *t1*, *flags* - | 16 bit byte swap on the low bits of a 32/64 bit input. | @@ -420,24 +425,24 @@ Misc | | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. - * - bswap32_i64 *t0*, *t1*, *flags* - - - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, - except they apply from bit 31 instead of bit 15. + * - bswap32 *t0*, *t1*, *flags* - * - bswap32_i32 *t0*, *t1*, *flags* + - | 32 bit byte swap. The flags are the same as for bswap16, except + they apply from bit 31 instead of bit 15. On TCG_TYPE_I32, the + flags should be zero. - bswap64_i64 *t0*, *t1*, *flags* + * - bswap64 *t0*, *t1*, *flags* - - | 32/64 bit byte swap. The flags are ignored, but still present - for consistency with the other bswap opcodes. + - | 64 bit byte swap. The flags are ignored, but still present + for consistency with the other bswap opcodes. For future + compatibility, the flags should be zero. * - discard_i32/i64 *t0* - | Indicate that the value of *t0* won't be used later. It is useful to force dead code elimination. - * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* + * - deposit *dest*, *t1*, *t2*, *pos*, *len* - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | @@ -446,14 +451,16 @@ Misc | *len* - the length of the bitfield | *pos* - the position of the first bit, counting from the LSB | - | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field + | For example, "deposit dest, t1, t2, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) + | + | on TCG_TYPE_I32. - * - extract_i32/i64 *dest*, *t1*, *pos*, *len* + * - extract *dest*, *t1*, *pos*, *len* - sextract_i32/i64 *dest*, *t1*, *pos*, *len* + sextract *dest*, *t1*, *pos*, *len* - | Extract a bitfield from *t1*, placing the result in *dest*. | @@ -462,16 +469,16 @@ Misc to the left with zeros; for sextract_*, the result will be extended to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | - | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field + | For example, "sextract dest, t1, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* << 20) >> 28 | - | (using an arithmetic right shift). + | (using an arithmetic right shift) on TCG_TYPE_I32. - * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* + * - extract2 *dest*, *t1*, *t2*, *pos* - - | For N = {32,64}, extract an N-bit quantity from the concatenation + - | For TCG_TYPE_I{N}, extract an N-bit quantity from the concatenation of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander accepts 0 <= *pos* <= N as inputs. The backend code generator will not see either 0 or N as inputs for these opcodes. @@ -494,19 +501,19 @@ Conditional moves .. list-table:: - * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - setcond *dest*, *t1*, *t2*, *cond* - | *dest* = (*t1* *cond* *t2*) | | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - negsetcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - negsetcond *dest*, *t1*, *t2*, *cond* - | *dest* = -(*t1* *cond* *t2*) | | Set *dest* to -1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* + * - movcond *dest*, *c1*, *c2*, *v1*, *v2*, *cond* - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | @@ -586,26 +593,79 @@ Multiword arithmetic support .. list-table:: - * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + * - addco *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* and in addition output to the + carry bit provided by the host architecture. + + * - addci *t0, *t1*, *t2* - sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture. + The output carry bit need not be computed. - - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are - formed from two single-word arguments, and the double-word output *t0* - is returned in two single-word outputs. + * - addcio *t0, *t1*, *t2* - * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture, + and also compute the output carry bit. + + * - addc1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + 1, and in addition output to the + carry bit provided by the host architecture. This is akin to + *addcio* with a fixed carry-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the addition chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_carry`` and then + the output routine for *addcio*. + + * - subbo *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* and in addition output to the + borrow bit provided by the host architecture. + | Depending on the host architecture, the carry bit may or may not be + identical to the borrow bit. Thus the addc\* and subb\* + opcodes must not be mixed. + + * - subbi *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture. + The output borrow bit need not be computed. + + * - subbio *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture, + and also compute the output borrow bit. + + * - subb1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - 1, and in addition output to the + borrow bit provided by the host architecture. This is akin to + *subbio* with a fixed borrow-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the subtraction chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_borrow`` and then + the output routine for *subbio*. + + * - mulu2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full double-word product *t0*. The latter is returned in two single-word outputs. - * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + * - muls2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. - * - mulsh_i32/i64 *t0*, *t1*, *t2* + * - mulsh *t0*, *t1*, *t2* - muluh_i32/i64 *t0*, *t1*, *t2* + muluh *t0*, *t1*, *t2* - | Provide the high part of a signed or unsigned multiply, respectively. | @@ -684,8 +744,6 @@ QEMU specific operations qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx* - qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* - - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output register *t0* only. The address *t1* is always sized according to the guest, @@ -703,10 +761,6 @@ QEMU specific operations 64-bit memory access specified in *flags*. | | For qemu_ld/st_i128, these are only supported for a 64-bit host. - | - | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of - the memory operation is known to be 8-bit. This allows the backend to - provide a different set of register constraints. Host vector operations @@ -884,9 +938,9 @@ Assumptions The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or 64 bit. It is expected that the pointer has the same size as the word. -On a 32 bit target, all 64 bit operations are converted to 32 bits. A -few specific operations must be implemented to allow it (see add2_i32, -sub2_i32, brcond2_i32). +On a 32 bit target, all 64 bit operations are converted to 32 bits. +A few specific operations must be implemented to allow it +(see brcond2_i32, setcond2_i32). On a 64 bit target, the values are transferred between 32 and 64-bit registers using the following ops: diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index 009e277..b439bdb 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -135,6 +135,8 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh); +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, + TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci); void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2); @@ -238,6 +240,8 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, + TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci); void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index cded92a..59d1975 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -253,6 +253,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i64 #define tcg_gen_add2_tl tcg_gen_add2_i64 #define tcg_gen_sub2_tl tcg_gen_sub2_i64 +#define tcg_gen_addcio_tl tcg_gen_addcio_i64 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 #define tcg_gen_muls2_tl tcg_gen_muls2_i64 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64 @@ -371,6 +372,7 @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64) #define tcg_gen_movcond_tl tcg_gen_movcond_i32 #define tcg_gen_add2_tl tcg_gen_add2_i32 #define tcg_gen_sub2_tl tcg_gen_sub2_i32 +#define tcg_gen_addcio_tl tcg_gen_addcio_i32 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32 diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 5bf78b0..995b793 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -34,148 +34,84 @@ DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) +DEF(brcond, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH | TCG_OPF_INT) DEF(mb, 0, 0, 1, TCG_OPF_NOT_PRESENT) -DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT) -DEF(setcond_i32, 1, 2, 1, 0) -DEF(negsetcond_i32, 1, 2, 1, 0) -DEF(movcond_i32, 1, 4, 1, 0) -/* load/store */ -DEF(ld8u_i32, 1, 1, 1, 0) -DEF(ld8s_i32, 1, 1, 1, 0) -DEF(ld16u_i32, 1, 1, 1, 0) -DEF(ld16s_i32, 1, 1, 1, 0) -DEF(ld_i32, 1, 1, 1, 0) -DEF(st8_i32, 0, 2, 1, 0) -DEF(st16_i32, 0, 2, 1, 0) -DEF(st_i32, 0, 2, 1, 0) -/* arith */ -DEF(add_i32, 1, 2, 0, 0) -DEF(sub_i32, 1, 2, 0, 0) -DEF(mul_i32, 1, 2, 0, 0) -DEF(div_i32, 1, 2, 0, 0) -DEF(divu_i32, 1, 2, 0, 0) -DEF(rem_i32, 1, 2, 0, 0) -DEF(remu_i32, 1, 2, 0, 0) -DEF(div2_i32, 2, 3, 0, 0) -DEF(divu2_i32, 2, 3, 0, 0) -DEF(and_i32, 1, 2, 0, 0) -DEF(or_i32, 1, 2, 0, 0) -DEF(xor_i32, 1, 2, 0, 0) -/* shifts/rotates */ -DEF(shl_i32, 1, 2, 0, 0) -DEF(shr_i32, 1, 2, 0, 0) -DEF(sar_i32, 1, 2, 0, 0) -DEF(rotl_i32, 1, 2, 0, 0) -DEF(rotr_i32, 1, 2, 0, 0) -DEF(deposit_i32, 1, 2, 2, 0) -DEF(extract_i32, 1, 1, 2, 0) -DEF(sextract_i32, 1, 1, 2, 0) -DEF(extract2_i32, 1, 2, 1, 0) +DEF(mov, 1, 1, 0, TCG_OPF_INT | TCG_OPF_NOT_PRESENT) + +DEF(add, 1, 2, 0, TCG_OPF_INT) +DEF(and, 1, 2, 0, TCG_OPF_INT) +DEF(andc, 1, 2, 0, TCG_OPF_INT) +DEF(bswap16, 1, 1, 1, TCG_OPF_INT) +DEF(bswap32, 1, 1, 1, TCG_OPF_INT) +DEF(bswap64, 1, 1, 1, TCG_OPF_INT) +DEF(clz, 1, 2, 0, TCG_OPF_INT) +DEF(ctpop, 1, 1, 0, TCG_OPF_INT) +DEF(ctz, 1, 2, 0, TCG_OPF_INT) +DEF(deposit, 1, 2, 2, TCG_OPF_INT) +DEF(divs, 1, 2, 0, TCG_OPF_INT) +DEF(divs2, 2, 3, 0, TCG_OPF_INT) +DEF(divu, 1, 2, 0, TCG_OPF_INT) +DEF(divu2, 2, 3, 0, TCG_OPF_INT) +DEF(eqv, 1, 2, 0, TCG_OPF_INT) +DEF(extract, 1, 1, 2, TCG_OPF_INT) +DEF(extract2, 1, 2, 1, TCG_OPF_INT) +DEF(ld8u, 1, 1, 1, TCG_OPF_INT) +DEF(ld8s, 1, 1, 1, TCG_OPF_INT) +DEF(ld16u, 1, 1, 1, TCG_OPF_INT) +DEF(ld16s, 1, 1, 1, TCG_OPF_INT) +DEF(ld32u, 1, 1, 1, TCG_OPF_INT) +DEF(ld32s, 1, 1, 1, TCG_OPF_INT) +DEF(ld, 1, 1, 1, TCG_OPF_INT) +DEF(movcond, 1, 4, 1, TCG_OPF_INT) +DEF(mul, 1, 2, 0, TCG_OPF_INT) +DEF(muls2, 2, 2, 0, TCG_OPF_INT) +DEF(mulsh, 1, 2, 0, TCG_OPF_INT) +DEF(mulu2, 2, 2, 0, TCG_OPF_INT) +DEF(muluh, 1, 2, 0, TCG_OPF_INT) +DEF(nand, 1, 2, 0, TCG_OPF_INT) +DEF(neg, 1, 1, 0, TCG_OPF_INT) +DEF(negsetcond, 1, 2, 1, TCG_OPF_INT) +DEF(nor, 1, 2, 0, TCG_OPF_INT) +DEF(not, 1, 1, 0, TCG_OPF_INT) +DEF(or, 1, 2, 0, TCG_OPF_INT) +DEF(orc, 1, 2, 0, TCG_OPF_INT) +DEF(rems, 1, 2, 0, TCG_OPF_INT) +DEF(remu, 1, 2, 0, TCG_OPF_INT) +DEF(rotl, 1, 2, 0, TCG_OPF_INT) +DEF(rotr, 1, 2, 0, TCG_OPF_INT) +DEF(sar, 1, 2, 0, TCG_OPF_INT) +DEF(setcond, 1, 2, 1, TCG_OPF_INT) +DEF(sextract, 1, 1, 2, TCG_OPF_INT) +DEF(shl, 1, 2, 0, TCG_OPF_INT) +DEF(shr, 1, 2, 0, TCG_OPF_INT) +DEF(st8, 0, 2, 1, TCG_OPF_INT) +DEF(st16, 0, 2, 1, TCG_OPF_INT) +DEF(st32, 0, 2, 1, TCG_OPF_INT) +DEF(st, 0, 2, 1, TCG_OPF_INT) +DEF(sub, 1, 2, 0, TCG_OPF_INT) +DEF(xor, 1, 2, 0, TCG_OPF_INT) + +DEF(addco, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addc1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(addci, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(addcio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) + +DEF(subbo, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subb1o, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_OUT) +DEF(subbi, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN) +DEF(subbio, 1, 2, 0, TCG_OPF_INT | TCG_OPF_CARRY_IN | TCG_OPF_CARRY_OUT) -DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) - -DEF(add2_i32, 2, 4, 0, 0) -DEF(sub2_i32, 2, 4, 0, 0) -DEF(mulu2_i32, 2, 2, 0, 0) -DEF(muls2_i32, 2, 2, 0, 0) -DEF(muluh_i32, 1, 2, 0, 0) -DEF(mulsh_i32, 1, 2, 0, 0) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) DEF(setcond2_i32, 1, 4, 1, 0) -DEF(ext8s_i32, 1, 1, 0, 0) -DEF(ext16s_i32, 1, 1, 0, 0) -DEF(ext8u_i32, 1, 1, 0, 0) -DEF(ext16u_i32, 1, 1, 0, 0) -DEF(bswap16_i32, 1, 1, 1, 0) -DEF(bswap32_i32, 1, 1, 1, 0) -DEF(not_i32, 1, 1, 0, 0) -DEF(neg_i32, 1, 1, 0, 0) -DEF(andc_i32, 1, 2, 0, 0) -DEF(orc_i32, 1, 2, 0, 0) -DEF(eqv_i32, 1, 2, 0, 0) -DEF(nand_i32, 1, 2, 0, 0) -DEF(nor_i32, 1, 2, 0, 0) -DEF(clz_i32, 1, 2, 0, 0) -DEF(ctz_i32, 1, 2, 0, 0) -DEF(ctpop_i32, 1, 1, 0, 0) - -DEF(mov_i64, 1, 1, 0, TCG_OPF_NOT_PRESENT) -DEF(setcond_i64, 1, 2, 1, 0) -DEF(negsetcond_i64, 1, 2, 1, 0) -DEF(movcond_i64, 1, 4, 1, 0) -/* load/store */ -DEF(ld8u_i64, 1, 1, 1, 0) -DEF(ld8s_i64, 1, 1, 1, 0) -DEF(ld16u_i64, 1, 1, 1, 0) -DEF(ld16s_i64, 1, 1, 1, 0) -DEF(ld32u_i64, 1, 1, 1, 0) -DEF(ld32s_i64, 1, 1, 1, 0) -DEF(ld_i64, 1, 1, 1, 0) -DEF(st8_i64, 0, 2, 1, 0) -DEF(st16_i64, 0, 2, 1, 0) -DEF(st32_i64, 0, 2, 1, 0) -DEF(st_i64, 0, 2, 1, 0) -/* arith */ -DEF(add_i64, 1, 2, 0, 0) -DEF(sub_i64, 1, 2, 0, 0) -DEF(mul_i64, 1, 2, 0, 0) -DEF(div_i64, 1, 2, 0, 0) -DEF(divu_i64, 1, 2, 0, 0) -DEF(rem_i64, 1, 2, 0, 0) -DEF(remu_i64, 1, 2, 0, 0) -DEF(div2_i64, 2, 3, 0, 0) -DEF(divu2_i64, 2, 3, 0, 0) -DEF(and_i64, 1, 2, 0, 0) -DEF(or_i64, 1, 2, 0, 0) -DEF(xor_i64, 1, 2, 0, 0) -/* shifts/rotates */ -DEF(shl_i64, 1, 2, 0, 0) -DEF(shr_i64, 1, 2, 0, 0) -DEF(sar_i64, 1, 2, 0, 0) -DEF(rotl_i64, 1, 2, 0, 0) -DEF(rotr_i64, 1, 2, 0, 0) -DEF(deposit_i64, 1, 2, 2, 0) -DEF(extract_i64, 1, 1, 2, 0) -DEF(sextract_i64, 1, 1, 2, 0) -DEF(extract2_i64, 1, 2, 1, 0) - /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, 0) DEF(extu_i32_i64, 1, 1, 0, 0) DEF(extrl_i64_i32, 1, 1, 0, 0) DEF(extrh_i64_i32, 1, 1, 0, 0) -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_COND_BRANCH) -DEF(ext8s_i64, 1, 1, 0, 0) -DEF(ext16s_i64, 1, 1, 0, 0) -DEF(ext32s_i64, 1, 1, 0, 0) -DEF(ext8u_i64, 1, 1, 0, 0) -DEF(ext16u_i64, 1, 1, 0, 0) -DEF(ext32u_i64, 1, 1, 0, 0) -DEF(bswap16_i64, 1, 1, 1, 0) -DEF(bswap32_i64, 1, 1, 1, 0) -DEF(bswap64_i64, 1, 1, 1, 0) -DEF(not_i64, 1, 1, 0, 0) -DEF(neg_i64, 1, 1, 0, 0) -DEF(andc_i64, 1, 2, 0, 0) -DEF(orc_i64, 1, 2, 0, 0) -DEF(eqv_i64, 1, 2, 0, 0) -DEF(nand_i64, 1, 2, 0, 0) -DEF(nor_i64, 1, 2, 0, 0) -DEF(clz_i64, 1, 2, 0, 0) -DEF(ctz_i64, 1, 2, 0, 0) -DEF(ctpop_i64, 1, 1, 0, 0) - -DEF(add2_i64, 2, 4, 0, 0) -DEF(sub2_i64, 2, 4, 0, 0) -DEF(mulu2_i64, 2, 2, 0, 0) -DEF(muls2_i64, 2, 2, 0, 0) -DEF(muluh_i64, 1, 2, 0, 0) -DEF(mulsh_i64, 1, 2, 0, 0) - #define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2) /* There are tcg_ctx->insn_start_words here, not just one. */ @@ -188,22 +124,10 @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END) DEF(plugin_cb, 0, 0, 1, TCG_OPF_NOT_PRESENT) DEF(plugin_mem_cb, 0, 1, 1, TCG_OPF_NOT_PRESENT) -DEF(qemu_ld_i32, 1, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i32, 0, 1 + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_ld_i64, DATA64_ARGS, 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i64, 0, DATA64_ARGS + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) - -/* Only used by i386 to cope with stupid register constraints. */ -DEF(qemu_st8_i32, 0, 1 + 1, 1, - TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) - -/* Only for 64-bit hosts at the moment. */ -DEF(qemu_ld_i128, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) -DEF(qemu_st_i128, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_ld2, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) +DEF(qemu_st2, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_INT) /* Host vector support. */ diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 84d9950..aa300a2 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -418,6 +418,11 @@ struct TCGContext { MemOp riscv_cur_vsew; TCGType riscv_cur_type; #endif + /* + * During the tcg_reg_alloc_op loop, we are within a sequence of + * carry-using opcodes like addco+addci. + */ + bool carry_live; GHashTable *const_table[TCG_TYPE_COUNT]; TCGTempSet free_temps[TCG_TYPE_COUNT]; @@ -741,19 +746,25 @@ enum { /* Instruction has side effects: it cannot be removed if its outputs are not used, and might trigger exceptions. */ TCG_OPF_SIDE_EFFECTS = 0x08, + /* Instruction operands may be I32 or I64 */ + TCG_OPF_INT = 0x10, /* Instruction is optional and not implemented by the host, or insn is generic and should not be implemented by the host. */ TCG_OPF_NOT_PRESENT = 0x20, /* Instruction operands are vectors. */ TCG_OPF_VECTOR = 0x40, /* Instruction is a conditional branch. */ - TCG_OPF_COND_BRANCH = 0x80 + TCG_OPF_COND_BRANCH = 0x80, + /* Instruction produces carry out. */ + TCG_OPF_CARRY_OUT = 0x100, + /* Instruction consumes carry in. */ + TCG_OPF_CARRY_IN = 0x200, }; typedef struct TCGOpDef { const char *name; uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; - uint8_t flags; + uint16_t flags; } TCGOpDef; extern const TCGOpDef tcg_op_defs[]; diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 43408c7..d9305f9 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1076,11 +1076,9 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i64 cf_64 = tcg_temp_new_i64(); TCGv_i64 vf_64 = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_extu_i32_i64(cf_64, cpu_CF); - tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); - tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); + tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); @@ -1094,12 +1092,10 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 t0_32 = tcg_temp_new_i32(); TCGv_i32 t1_32 = tcg_temp_new_i32(); TCGv_i32 tmp = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_extrl_i64_i32(t0_32, t0); tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); @@ -8600,7 +8596,7 @@ static bool trans_CCMP(DisasContext *s, arg_CCMP *a) tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); nzcv = a->nzcv; - has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); + has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); if (nzcv & 8) { /* N */ tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); } else { diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index d23be47..f3cf028 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -629,7 +629,7 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) * = | ~(m | k) */ tcg_gen_and_i64(n, n, k); - if (tcg_op_supported(INDEX_op_orc_i64, TCG_TYPE_I64, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { tcg_gen_or_i64(m, m, k); tcg_gen_orc_i64(d, n, m); } else { diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 273b860..88df9c4 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -494,20 +494,9 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp = tcg_temp_new_i32(); - if (tcg_op_supported(INDEX_op_add2_i32, TCG_TYPE_I32, 0)) { - tcg_gen_movi_i32(tmp, 0); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp); - } else { - TCGv_i64 q0 = tcg_temp_new_i64(); - TCGv_i64 q1 = tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(q0, t0); - tcg_gen_extu_i32_i64(q1, t1); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extu_i32_i64(q1, cpu_CF); - tcg_gen_add_i64(q0, q0, q1); - tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); - } + + tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0, t1, cpu_CF); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 14f3833..88a7d33 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -1209,10 +1209,10 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, cb_msb = tcg_temp_new_i64(); cb = tcg_temp_new_i64(); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); if (is_c) { - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, - get_psw_carry(ctx, d), ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, in2, get_psw_carry(ctx, d)); + } else { + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); } tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); @@ -1308,9 +1308,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, if (is_b) { /* DEST,C = IN1 + ~IN2 + C. */ tcg_gen_not_i64(cb, in2); - tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, - get_psw_carry(ctx, d), ctx->zero); - tcg_gen_add2_i64(dest, cb_msb, dest, cb_msb, cb, ctx->zero); + tcg_gen_addcio_i64(dest, cb_msb, in1, cb, get_psw_carry(ctx, d)); tcg_gen_xor_i64(cb, cb, in1); tcg_gen_xor_i64(cb, cb, dest); } else { @@ -3008,9 +3006,7 @@ static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a) tcg_gen_xor_i64(add2, in2, addc); tcg_gen_andi_i64(addc, addc, 1); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, add1, ctx->zero, add2, ctx->zero); - tcg_gen_add2_i64(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb, - addc, ctx->zero); + tcg_gen_addcio_i64(dest, cpu_psw_cb_msb, add1, add2, addc); /* Write back the result register. */ save_gpr(ctx, a->t, dest); @@ -3553,8 +3549,7 @@ static bool do_addb(DisasContext *ctx, unsigned r, TCGv_i64 in1, TCGv_i64 cb = tcg_temp_new_i64(); TCGv_i64 cb_msb = tcg_temp_new_i64(); - tcg_gen_movi_i64(cb_msb, 0); - tcg_gen_add2_i64(dest, cb_msb, in1, cb_msb, in2, cb_msb); + tcg_gen_add2_i64(dest, cb_msb, in1, ctx->zero, in2, ctx->zero); tcg_gen_xor_i64(cb, in1, in2); tcg_gen_xor_i64(cb, cb, dest); cb_cond = get_carry(ctx, d, cb, cb_msb); diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index ca6bc2e..e3166e7 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,16 +19,6 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -/* - * Sometimes, knowing what the backend has can produce better code. - * The exact opcode to check depends on 32- vs. 64-bit. - */ -#ifdef TARGET_X86_64 -#define INDEX_op_extract2_tl INDEX_op_extract2_i64 -#else -#define INDEX_op_extract2_tl INDEX_op_extract2_i32 -#endif - #define MMX_OFFSET(reg) \ ({ assert((reg) >= 0 && (reg) <= 7); \ offsetof(CPUX86State, fpregs[reg].mmx); }) @@ -3023,7 +3013,7 @@ static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); while (vec_len > 8) { vec_len -= 8; - if (tcg_op_supported(INDEX_op_extract2_tl, TCG_TYPE_TL, 0)) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_TL, 0)) { /* * Load the next byte of the result into the high byte of T. * TCG does a similar expansion of deposit to shl+extract2; by diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index 7dcad6c..23f1037 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -311,11 +311,7 @@ static void gen_add(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_addc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 tmp = tcg_temp_new_i32(); - - tcg_gen_add2_i32(tmp, cpu_msr_c, ina, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, ina, inb, cpu_msr_c); } /* Input carry, but no output carry. */ @@ -544,12 +540,10 @@ static void gen_rsub(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) /* Input and output carry. */ static void gen_rsubc(TCGv_i32 out, TCGv_i32 ina, TCGv_i32 inb) { - TCGv_i32 zero = tcg_constant_i32(0); TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_not_i32(tmp, ina); - tcg_gen_add2_i32(tmp, cpu_msr_c, tmp, zero, cpu_msr_c, zero); - tcg_gen_add2_i32(out, cpu_msr_c, tmp, cpu_msr_c, inb, zero); + tcg_gen_addcio_i32(out, cpu_msr_c, tmp, inb, cpu_msr_c); } /* No input or output carry. */ diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index d4ce601..baadea4 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -221,8 +221,7 @@ static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb) TCGv t0 = tcg_temp_new(); TCGv res = tcg_temp_new(); - tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, cpu_sr_cy, dc->zero); - tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, dc->zero); + tcg_gen_addcio_tl(res, cpu_sr_cy, srca, srcb, cpu_sr_cy); tcg_gen_xor_tl(cpu_sr_ov, srca, srcb); tcg_gen_xor_tl(t0, res, srcb); tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index fea2f2c..62dd008 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -1746,11 +1746,10 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(ca32, ca); } } else { - TCGv zero = tcg_constant_tl(0); if (add_ca) { - tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero); - tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero); + tcg_gen_addcio_tl(t0, ca, arg1, arg2, ca); } else { + TCGv zero = tcg_constant_tl(0); tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero); } gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0); @@ -1949,11 +1948,9 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_mov_tl(cpu_ca32, cpu_ca); } } else if (add_ca) { - TCGv zero, inv1 = tcg_temp_new(); + TCGv inv1 = tcg_temp_new(); tcg_gen_not_tl(inv1, arg1); - zero = tcg_constant_tl(0); - tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero); - tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero); + tcg_gen_addcio_tl(t0, cpu_ca, arg2, inv1, cpu_ca); gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, cpu_ca32, 0); } else { tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1); diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 00073c5..a714f9c 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -1250,11 +1250,7 @@ static DisasJumpType op_addc32(DisasContext *s, DisasOps *o) static DisasJumpType op_addc64(DisasContext *s, DisasOps *o) { compute_carry(s); - - TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero); - tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero); - + tcg_gen_addcio_i64(o->out, cc_src, o->in1, o->in2, cc_src); return DISAS_NEXT; } diff --git a/target/sh4/translate.c b/target/sh4/translate.c index d796ad5..712117b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -695,14 +695,8 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x300e: /* addc Rm,Rn */ - { - TCGv t0, t1; - t0 = tcg_constant_tl(0); - t1 = tcg_temp_new(); - tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0); - tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, - REG(B11_8), t0, t1, cpu_sr_t); - } + tcg_gen_addcio_i32(REG(B11_8), cpu_sr_t, + REG(B11_8), REG(B7_4), cpu_sr_t); return; case 0x300f: /* addv Rm,Rn */ { @@ -1940,16 +1934,16 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) NEXT_INSN; switch (ctx->opcode & 0xf00f) { case 0x300c: /* add Rm,Rn */ - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; goto do_reg_op; case 0x2009: /* and Rm,Rn */ - op_opc = INDEX_op_and_i32; + op_opc = INDEX_op_and; goto do_reg_op; case 0x200a: /* xor Rm,Rn */ - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; goto do_reg_op; case 0x200b: /* or Rm,Rn */ - op_opc = INDEX_op_or_i32; + op_opc = INDEX_op_or; do_reg_op: /* The operation register should be as expected, and the other input cannot depend on the load. */ @@ -1976,7 +1970,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) goto fail; } op_dst = B11_8; - op_opc = INDEX_op_xor_i32; + op_opc = INDEX_op_xor; op_arg = tcg_constant_i32(-1); break; @@ -1984,7 +1978,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (op_dst != B11_8 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_add_i32; + op_opc = INDEX_op_add; op_arg = tcg_constant_i32(B7_0s); break; @@ -1995,7 +1989,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; /* placeholder */ + op_opc = INDEX_op_setcond; /* placeholder */ op_src = (ld_dst == B11_8 ? B7_4 : B11_8); op_arg = REG(op_src); @@ -2030,7 +2024,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) { goto fail; } - op_opc = INDEX_op_setcond_i32; + op_opc = INDEX_op_setcond; op_arg = tcg_constant_i32(0); NEXT_INSN; @@ -2087,7 +2081,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) ctx->memidx, ld_mop); break; - case INDEX_op_add_i32: + case INDEX_op_add: if (op_dst != st_src) { goto fail; } @@ -2105,7 +2099,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_and_i32: + case INDEX_op_and: if (op_dst != st_src) { goto fail; } @@ -2119,7 +2113,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_or_i32: + case INDEX_op_or: if (op_dst != st_src) { goto fail; } @@ -2133,7 +2127,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_xor_i32: + case INDEX_op_xor: if (op_dst != st_src) { goto fail; } @@ -2147,7 +2141,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) } break; - case INDEX_op_setcond_i32: + case INDEX_op_setcond: if (st_src == ld_dst) { goto fail; } diff --git a/target/sparc/translate.c b/target/sparc/translate.c index adebddf..63dd904 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -396,8 +396,7 @@ static void gen_op_addcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin) TCGv z = tcg_constant_tl(0); if (cin) { - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, cin, z); - tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, cpu_cc_N, cpu_cc_C, src2, z); + tcg_gen_addcio_tl(cpu_cc_N, cpu_cc_C, src1, src2, cin); } else { tcg_gen_add2_tl(cpu_cc_N, cpu_cc_C, src1, z, src2, z); } diff --git a/target/tricore/translate.c b/target/tricore/translate.c index 7cd26d8..ba36c9f 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -1346,15 +1346,11 @@ static inline void gen_addi_CC(TCGv ret, TCGv r1, int32_t con) static inline void gen_addc_CC(TCGv ret, TCGv r1, TCGv r2) { - TCGv carry = tcg_temp_new_i32(); - TCGv t0 = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new_i32(); TCGv result = tcg_temp_new_i32(); - tcg_gen_movi_tl(t0, 0); - tcg_gen_setcondi_tl(TCG_COND_NE, carry, cpu_PSW_C, 0); /* Addition, carry and set C/V/SV bits */ - tcg_gen_add2_i32(result, cpu_PSW_C, r1, t0, carry, t0); - tcg_gen_add2_i32(result, cpu_PSW_C, result, cpu_PSW_C, r2, t0); + tcg_gen_addcio_i32(result, cpu_PSW_C, r1, r2, cpu_PSW_C); /* calc V bit */ tcg_gen_xor_tl(cpu_PSW_V, result, r1); tcg_gen_xor_tl(t0, r1, r2); @@ -3981,7 +3977,7 @@ static void decode_bit_andacc(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_and_tl); break; case OPC2_32_BIT_AND_NOR_T: - if (tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_andc_tl); } else { @@ -4114,7 +4110,7 @@ static void decode_bit_orand(DisasContext *ctx) pos1, pos2, &tcg_gen_andc_tl, &tcg_gen_or_tl); break; case OPC2_32_BIT_OR_NOR_T: - if (tcg_op_supported(INDEX_op_orc_i32, TCG_TYPE_I32, 0)) { + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { gen_bit_2op(cpu_gpr_d[r3], cpu_gpr_d[r1], cpu_gpr_d[r2], pos1, pos2, &tcg_gen_or_tl, &tcg_gen_orc_tl); } else { diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h index 1281e5e..d0622e6 100644 --- a/tcg/aarch64/tcg-target-con-set.h +++ b/tcg/aarch64/tcg-target-con-set.h @@ -18,14 +18,16 @@ C_O1_I1(r, r) C_O1_I1(w, r) C_O1_I1(w, w) C_O1_I1(w, wr) -C_O1_I2(r, 0, rz) C_O1_I2(r, r, r) C_O1_I2(r, r, rA) C_O1_I2(r, r, rAL) C_O1_I2(r, r, rC) C_O1_I2(r, r, ri) C_O1_I2(r, r, rL) +C_O1_I2(r, rZ, rA) +C_O1_I2(r, rz, rMZ) C_O1_I2(r, rz, rz) +C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) C_O1_I2(w, w, w) C_O1_I2(w, w, wN) @@ -34,4 +36,3 @@ C_O1_I2(w, w, wZ) C_O1_I3(w, w, w, w) C_O1_I4(r, r, rC, rz, rz) C_O2_I1(r, r, r) -C_O2_I4(r, r, rz, rz, rA, rMZ) diff --git a/tcg/aarch64/tcg-target-has.h b/tcg/aarch64/tcg-target-has.h index 39f01c1..69e83ef 100644 --- a/tcg/aarch64/tcg-target-has.h +++ b/tcg/aarch64/tcg-target-has.h @@ -13,64 +13,7 @@ #define have_lse2 (cpuinfo & CPUINFO_LSE2) /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_eqv_i32 1 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 /* * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4645242..4cb647c 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -508,7 +508,9 @@ typedef enum { /* Add/subtract with carry instructions. */ I3503_ADC = 0x1a000000, + I3503_ADCS = 0x3a000000, I3503_SBC = 0x5a000000, + I3503_SBCS = 0x7a000000, /* Conditional select instructions. */ I3506_CSEL = 0x1a800000, @@ -1347,70 +1349,37 @@ static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd, tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a); } -static inline void tcg_out_shl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) +static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond, + TCGReg a, TCGReg b) { - int bits = ext ? 64 : 32; - int max = bits - 1; - tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max); -} - -static inline void tcg_out_shr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_ubfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_sar(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_sbfm(s, ext, rd, rn, m & max, max); -} - -static inline void tcg_out_rotr(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, m & max); -} - -static inline void tcg_out_rotl(TCGContext *s, TCGType ext, - TCGReg rd, TCGReg rn, unsigned int m) -{ - int max = ext ? 63 : 31; - tcg_out_extr(s, ext, rd, rn, rn, -m & max); + if (is_tst_cond(cond)) { + tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); + } else { + tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); + } } -static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd, - TCGReg rn, unsigned lsb, unsigned width) +static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond, + TCGReg a, tcg_target_long b) { - unsigned size = ext ? 64 : 32; - unsigned a = (size - lsb) & (size - 1); - unsigned b = width - 1; - tcg_out_bfm(s, ext, rd, rn, a, b); + if (is_tst_cond(cond)) { + tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); + } else if (b >= 0) { + tcg_debug_assert(is_aimm(b)); + tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); + } else { + tcg_debug_assert(is_aimm(-b)); + tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); + } } static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a, tcg_target_long b, bool const_b) { - if (is_tst_cond(cond)) { - if (!const_b) { - tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b); - } else { - tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b); - } + if (const_b) { + tgen_cmpi(s, ext, cond, a, b); } else { - if (!const_b) { - tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b); - } else if (b >= 0) { - tcg_debug_assert(is_aimm(b)); - tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b); - } else { - tcg_debug_assert(is_aimm(-b)); - tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b); - } + tgen_cmp(s, ext, cond, a, b); } } @@ -1438,7 +1407,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, tcg_out_call_int(s, target); } -static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) +static void tcg_out_br(TCGContext *s, TCGLabel *l) { if (!l->has_value) { tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); @@ -1448,8 +1417,16 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) } } -static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, - TCGArg b, bool b_const, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg a, TCGReg b, TCGLabel *l) +{ + tgen_cmp(s, type, c, a, b); + tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); + tcg_out_insn(s, 3202, B_C, c, 0); +} + +static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c, + TCGReg a, tcg_target_long b, TCGLabel *l) { int tbit = -1; bool need_cmp = true; @@ -1458,14 +1435,14 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, case TCG_COND_EQ: case TCG_COND_NE: /* cmp xN,0; b.ne L -> cbnz xN,L */ - if (b_const && b == 0) { + if (b == 0) { need_cmp = false; } break; case TCG_COND_LT: case TCG_COND_GE: /* cmp xN,0; b.mi L -> tbnz xN,63,L */ - if (b_const && b == 0) { + if (b == 0) { c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ); tbit = ext ? 63 : 31; need_cmp = false; @@ -1474,14 +1451,14 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, case TCG_COND_TSTEQ: case TCG_COND_TSTNE: /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */ - if (b_const && b == UINT32_MAX) { + if (b == UINT32_MAX) { c = tcg_tst_eqne_cond(c); ext = TCG_TYPE_I32; need_cmp = false; break; } /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */ - if (b_const && is_power_of_2(b)) { + if (is_power_of_2(b)) { tbit = ctz64(b); need_cmp = false; } @@ -1491,7 +1468,7 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } if (need_cmp) { - tcg_out_cmp(s, ext, c, a, b, b_const); + tgen_cmpi(s, ext, c, a, b); tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0); tcg_out_insn(s, 3202, B_C, c, 0); return; @@ -1524,6 +1501,12 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a, } } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits, TCGReg rd, TCGReg rn) { @@ -1592,67 +1575,7 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) tcg_out_mov(s, TCG_TYPE_I32, rd, rn); } -static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd, - TCGReg rn, int64_t aimm) -{ - if (aimm >= 0) { - tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm); - } else { - tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm); - } -} - -static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, - TCGReg rh, TCGReg al, TCGReg ah, - tcg_target_long bl, tcg_target_long bh, - bool const_bl, bool const_bh, bool sub) -{ - TCGReg orig_rl = rl; - AArch64Insn insn; - - if (rl == ah || (!const_bh && rl == bh)) { - rl = TCG_REG_TMP0; - } - - if (const_bl) { - if (bl < 0) { - bl = -bl; - insn = sub ? I3401_ADDSI : I3401_SUBSI; - } else { - insn = sub ? I3401_SUBSI : I3401_ADDSI; - } - - if (unlikely(al == TCG_REG_XZR)) { - /* ??? We want to allow al to be zero for the benefit of - negation via subtraction. However, that leaves open the - possibility of adding 0+const in the low part, and the - immediate add instructions encode XSP not XZR. Don't try - anything more elaborate here than loading another zero. */ - al = TCG_REG_TMP0; - tcg_out_movi(s, ext, al, 0); - } - tcg_out_insn_3401(s, insn, ext, rl, al, bl); - } else { - tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl); - } - - insn = I3503_ADC; - if (const_bh) { - /* Note that the only two constants we support are 0 and -1, and - that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */ - if ((bh != 0) ^ sub) { - insn = I3503_SBC; - } - bh = TCG_REG_XZR; - } else if (sub) { - insn = I3503_SBC; - } - tcg_out_insn_3503(s, insn, ext, rh, ah, bh); - - tcg_out_mov(s, ext, orig_rl, rl); -} - -static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { static const uint32_t sync[] = { [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST, @@ -1664,37 +1587,6 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, sync[a0 & TCG_MO_ALL]); } -static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, - TCGReg a0, TCGArg b, bool const_b, bool is_ctz) -{ - TCGReg a1 = a0; - if (is_ctz) { - a1 = TCG_REG_TMP0; - tcg_out_insn(s, 3507, RBIT, ext, a1, a0); - } - if (const_b && b == (ext ? 64 : 32)) { - tcg_out_insn(s, 3507, CLZ, ext, d, a1); - } else { - AArch64Insn sel = I3506_CSEL; - - tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1); - tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); - - if (const_b) { - if (b == -1) { - b = TCG_REG_XZR; - sel = I3506_CSINV; - } else if (b == 0) { - b = TCG_REG_XZR; - } else { - tcg_out_movi(s, ext, d, b); - b = d; - } - } - tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); - } -} - typedef struct { TCGReg base; TCGReg index; @@ -1914,8 +1806,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType data_type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1930,8 +1822,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType data_type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1946,6 +1843,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -2048,6 +1950,28 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(rz, rz, r), + .out = tgen_qemu_st2, +}; + static const tcg_insn_unit *tb_ret_addr; static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -2094,6 +2018,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) tcg_out_bti(s, BTI_J); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_insn(s, 3207, BR, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2115,402 +2044,859 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - /* Hoist the loads of the most common arguments. */ - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - int c2 = const_args[2]; + tcg_out_insn(s, 3502, ADD, type, a0, a1, a2); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_insn(s, 3207, BR, a0); - break; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2); + } +} - case INDEX_op_br: - tcg_out_goto_label(s, arg_label(a0)); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rA), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0); - break; - case INDEX_op_ld8s_i32: - tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0); - break; - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1); - break; - case INDEX_op_ld16s_i32: - tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1); - break; - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2); - break; - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3); - break; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, ADDS, type, a0, a1, a2); +} - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1); +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, ADDSI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, SUBSI, type, a0, a1, -a2); + } +} + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, r, rA), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, +}; + +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, ADC, type, a0, a1, a2); +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* + * Note that the only two constants we support are 0 and -1, and + * that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. + */ + if (a2) { + tcg_out_insn(s, 3503, SBC, type, a0, a1, TCG_REG_XZR); + } else { + tcg_out_insn(s, 3503, ADC, type, a0, a1, TCG_REG_XZR); + } +} + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, ADCS, type, a0, a1, a2); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Use SBCS w/0 for ADCS w/-1 -- see above. */ + if (a2) { + tcg_out_insn(s, 3503, SBCS, type, a0, a1, TCG_REG_XZR); + } else { + tcg_out_insn(s, 3503, ADCS, type, a0, a1, TCG_REG_XZR); + } +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out_insn(s, 3502, SUBS, TCG_TYPE_I32, + TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, AND, type, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, BIC, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); + tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1); + tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) { + tcg_out_insn(s, 3507, CLZ, type, a0, a1); + return; + } + + tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true); + tcg_out_insn(s, 3507, CLZ, type, a0, a1); + + switch (a2) { + case -1: + tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2); + case 0: + tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE); break; - case INDEX_op_st_i64: - tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3); + default: + tcg_out_movi(s, type, TCG_REG_TMP0, a2); + tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE); break; + } +} - case INDEX_op_add_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_add_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rAL), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; - case INDEX_op_sub_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_sub_i64: - if (c2) { - tcg_out_addsubi(s, ext, a0, a1, -a2); - } else { - tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2); - } - break; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_neg_i64: - case INDEX_op_neg_i32: - tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1); - break; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); + tgen_clz(s, type, a0, TCG_REG_TMP0, a2); +} - case INDEX_op_and_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_and_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, AND, ext, a0, a1, a2); - } - break; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1); + tgen_clzi(s, type, a0, TCG_REG_TMP0, a2); +} - case INDEX_op_andc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_andc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, rAL), + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; - case INDEX_op_or_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_or_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2); - } - break; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2); +} - case INDEX_op_orc_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_orc_i64: - if (c2) { - tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; - case INDEX_op_xor_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_xor_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2); - } - break; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_eqv_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2); - } else { - tcg_out_insn(s, 3510, EON, ext, a0, a1, a2); - } - break; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2); +} - case INDEX_op_not_i64: - case INDEX_op_not_i32: - tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1); - break; +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; - case INDEX_op_mul_i64: - case INDEX_op_mul_i32: - tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR); - break; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_div_i64: - case INDEX_op_div_i32: - tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2); - break; - case INDEX_op_divu_i64: - case INDEX_op_divu_i32: - tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2); - break; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, EON, type, a0, a1, a2); +} - case INDEX_op_rem_i64: - case INDEX_op_rem_i32: - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); - break; - case INDEX_op_remu_i64: - case INDEX_op_remu_i32: - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); - break; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; - case INDEX_op_shl_i64: - case INDEX_op_shl_i32: - if (c2) { - tcg_out_shl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2); - } - break; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63); +} - case INDEX_op_shr_i64: - case INDEX_op_shr_i32: - if (c2) { - tcg_out_shr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2); - } - break; +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; - case INDEX_op_sar_i64: - case INDEX_op_sar_i32: - if (c2) { - tcg_out_sar(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2); - } - break; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR); +} - case INDEX_op_rotr_i64: - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_rotr(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; - case INDEX_op_rotl_i64: - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_rotl(s, ext, a0, a1, a2); - } else { - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); - } - break; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_clz_i64: - case INDEX_op_clz_i32: - tcg_out_cltz(s, ext, a0, a1, a2, c2, false); - break; - case INDEX_op_ctz_i64: - case INDEX_op_ctz_i32: - tcg_out_cltz(s, ext, a0, a1, a2, c2, true); - break; +static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented; +} - case INDEX_op_brcond_i32: - a1 = (int32_t)a1; - /* FALLTHRU */ - case INDEX_op_brcond_i64: - tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3])); - break; +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); +} - case INDEX_op_setcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_setcond_i64: - tcg_out_cmp(s, ext, args[3], a1, a2, c2); - /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_mulsh, +}; - case INDEX_op_negsetcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_negsetcond_i64: - tcg_out_cmp(s, ext, args[3], a1, a2, c2); - /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ - tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR, - TCG_REG_XZR, tcg_invert_cond(args[3])); - break; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_movcond_i32: - a2 = (int32_t)a2; - /* FALLTHRU */ - case INDEX_op_movcond_i64: - tcg_out_cmp(s, ext, args[5], a1, a2, c2); - tcg_out_insn(s, 3506, CSEL, ext, a0, args[3], args[4], args[5]); - break; +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); +} - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, ext); - break; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, ext); - break; - case INDEX_op_qemu_ld_i128: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); - break; - case INDEX_op_qemu_st_i128: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false); - break; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_muluh, +}; - case INDEX_op_bswap64_i64: - tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); - break; - case INDEX_op_bswap32_i64: - tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } - break; - case INDEX_op_bswap32_i32: - tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); - break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); - if (a2 & TCG_BSWAP_OS) { - /* Output must be sign-extended. */ - tcg_out_ext16s(s, ext, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* Output must be zero-extended, but input isn't. */ - tcg_out_ext16u(s, a0, a0); - } - break; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_deposit_i64: - case INDEX_op_deposit_i32: - tcg_out_dep(s, ext, a0, a2, args[3], args[4]); - break; +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_extract_i64: - case INDEX_op_extract_i32: - if (a2 == 0) { - uint64_t mask = MAKE_64BIT_MASK(0, args[3]); - tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask); - } else { - tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1); - } - break; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, ORR, type, a0, a1, a2); +} - case INDEX_op_sextract_i64: - case INDEX_op_sextract_i32: - tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1); - break; +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2); +} - case INDEX_op_extract2_i64: - case INDEX_op_extract2_i32: - tcg_out_extr(s, ext, a0, a2, a1, args[3]); - break; +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; - case INDEX_op_add2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], - (int32_t)args[4], args[5], const_args[4], - const_args[5], false); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], - args[5], const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3], - (int32_t)args[4], args[5], const_args[4], - const_args[5], true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4], - args[5], const_args[4], const_args[5], true); - break; +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, ORN, type, a0, a1, a2); +} - case INDEX_op_muluh_i64: - tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2); - break; - case INDEX_op_mulsh_i64: - tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2); - break; +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2); + tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); +} - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2); + tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, RORV, type, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_extr(s, type, a0, a1, a1, a2 & max); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_sbfm(s, type, a0, a1, a2 & max, max); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a1, a2 & max, max); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, SUB, type, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3502, SUBS, type, a0, a1, a2); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 >= 0) { + tcg_out_insn(s, 3401, SUBSI, type, a0, a1, a2); + } else { + tcg_out_insn(s, 3401, ADDSI, type, a0, a1, -a2); + } +} + +static void tgen_subbo_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, a2); +} + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + if (a2 == 0) { + tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, TCG_REG_XZR); + return; + } + + /* + * We want to allow a1 to be zero for the benefit of negation via + * subtraction. However, that leaves open the possibility of + * adding 0 +/- const, and the immediate add/sub instructions + * encode XSP not XZR. Since we have 0 - non-zero, borrow is + * always set. + */ + tcg_out_movi(s, type, a0, -a2); + tcg_out_set_borrow(s); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, rZ, rA), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, +}; + +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, SBC, type, a0, a1, a2); +} + +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addci_rri(s, type, a0, a1, ~a2); +} + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3503, SBCS, type, a0, a1, a2); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addcio_imm(s, type, a0, a1, ~a2); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, rz, rMZ), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out_insn(s, 3502, ADDS, TCG_TYPE_I32, + TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_insn(s, 3510, EOR, type, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rL), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1); + if (flags & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + tcg_out_ext16s(s, type, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_ext16u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_XZR, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_orc(s, type, a0, TCG_REG_XZR, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret) +{ + /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(cond)); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_cmp(s, type, cond, a1, a2); + tgen_cset(s, cond, a0); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_cmpi(s, type, cond, a1, a2); + tgen_cset(s, cond, a0); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret) +{ + /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond). */ + tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR, + TCG_REG_XZR, tcg_invert_cond(cond)); +} + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_cmp(s, type, cond, a1, a2); + tgen_csetm(s, type, cond, a0); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_cmpi(s, type, cond, a1, a2); + tgen_csetm(s, type, cond, a0); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool const_vf) +{ + tcg_out_cmp(s, type, cond, c1, c2, const_c2); + tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rz, rz), + .out = tgen_movcond, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + + /* + * Since we can't support "0Z" as a constraint, we allow a1 in + * any register. Fix things up as if a matching constraint. + */ + if (a0 != a1) { + if (a0 == a2) { + tcg_out_mov(s, type, TCG_REG_TMP0, a2); + a2 = TCG_REG_TMP0; + } + tcg_out_mov(s, type, a0, a1); } + tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1); +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); } +static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, + unsigned ofs, unsigned len) +{ + int max = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, rZ, rZ), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, + .out_rzr = tgen_depositz, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + uint64_t mask = MAKE_64BIT_MASK(0, len); + tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask); + } else { + tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + tcg_out_extr(s, type, a0, a2, a1, shr); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, rz, rz), + .out_rrr = tgen_extract2, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRB, dest, base, offset, 0); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSBW : I3312_LDRSBX; + tcg_out_ldst(s, insn, dest, base, offset, 0); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRH, dest, base, offset, 1); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSHW : I3312_LDRSHX; + tcg_out_ldst(s, insn, dest, base, offset, 1); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRW, dest, base, offset, 2); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_LDRSWX, dest, base, offset, 2); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_STRB, data, base, offset, 0); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, I3312_STRH, data, base, offset, 1); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2955,148 +3341,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext8u_i32: - case INDEX_op_ext16u_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, r, rA); - - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); - - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_div_i32: - case INDEX_op_div_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - case INDEX_op_muluh_i64: - case INDEX_op_mulsh_i64: - return C_O1_I2(r, r, r); - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rL); - - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - - case INDEX_op_clz_i32: - case INDEX_op_ctz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rAL); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rz, rz); - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i128: - return C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(rz, r); - case INDEX_op_qemu_st_i128: - return C_O0_I3(rz, rz, r); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rz); - - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: - return C_O1_I2(r, rz, rz); - - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rA, rMZ); - case INDEX_op_add_vec: case INDEX_op_sub_vec: case INDEX_op_mul_vec: diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h index 229ae25..16b1193 100644 --- a/tcg/arm/tcg-target-con-set.h +++ b/tcg/arm/tcg-target-con-set.h @@ -30,6 +30,9 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) C_O1_I2(r, r, rIN) C_O1_I2(r, r, ri) +C_O1_I2(r, rI, r) +C_O1_I2(r, rI, rIK) +C_O1_I2(r, rI, rIN) C_O1_I2(r, rZ, rZ) C_O1_I2(w, 0, w) C_O1_I2(w, w, w) @@ -42,5 +45,3 @@ C_O1_I4(r, r, rIN, rIK, 0) C_O2_I1(e, p, q) C_O2_I2(e, p, q, q) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, r, r, rIN, rIK) -C_O2_I4(r, r, rI, rI, rIN, rIK) diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h index e3510a8..3bbbde5 100644 --- a/tcg/arm/tcg-target-has.h +++ b/tcg/arm/tcg-target-has.h @@ -24,34 +24,7 @@ extern bool use_neon_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */ -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_div_i32 use_idiv_instructions -#define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_HAS_v64 use_neon_instructions diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index cec3d76..447e435 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -178,6 +178,8 @@ typedef enum { INSN_DMB_ISH = 0xf57ff05b, INSN_DMB_MCR = 0xee070fba, + INSN_MSRI_CPSR = 0x0360f000, + /* Architected nop introduced in v6k. */ /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this also Just So Happened to do nothing on pre-v6k so that we @@ -874,22 +876,39 @@ static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc, * Emit either the reg,imm or reg,reg form of a data-processing insn. * rhs must satisfy the "rIK" constraint. */ +static void tcg_out_dat_IK(TCGContext *s, ARMCond cond, ARMInsn opc, + ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs) +{ + int imm12 = encode_imm(rhs); + if (imm12 < 0) { + imm12 = encode_imm_nofail(~rhs); + opc = opinv; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); +} + static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc, ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs, bool rhs_is_const) { if (rhs_is_const) { - int imm12 = encode_imm(rhs); - if (imm12 < 0) { - imm12 = encode_imm_nofail(~rhs); - opc = opinv; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); + tcg_out_dat_IK(s, cond, opc, opinv, dst, lhs, rhs); } else { tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } +static void tcg_out_dat_IN(TCGContext *s, ARMCond cond, ARMInsn opc, + ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs) +{ + int imm12 = encode_imm(rhs); + if (imm12 < 0) { + imm12 = encode_imm_nofail(-rhs); + opc = opneg; + } + tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); +} + static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs, bool rhs_is_const) @@ -898,52 +917,12 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, * rhs must satisfy the "rIN" constraint. */ if (rhs_is_const) { - int imm12 = encode_imm(rhs); - if (imm12 < 0) { - imm12 = encode_imm_nofail(-rhs); - opc = opneg; - } - tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12); + tcg_out_dat_IN(s, cond, opc, opneg, dst, lhs, rhs); } else { tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0)); } } -static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, TCGReg rm) -{ - /* mul */ - tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); -} - -static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* umull */ - tcg_out32(s, (cond << 28) | 0x00800090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, - TCGReg rd1, TCGReg rn, TCGReg rm) -{ - /* smull */ - tcg_out32(s, (cond << 28) | 0x00c00090 | - (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); -} - -static void tcg_out_sdiv(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - -static void tcg_out_udiv(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, TCGReg rm) -{ - tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); -} - static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { /* sxtb */ @@ -992,54 +971,40 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) g_assert_not_reached(); } -static void tcg_out_bswap16(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int flags) +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) { - if (flags & TCG_BSWAP_OS) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - return; - } - - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); - } + /* bfi/bfc */ + tcg_out32(s, 0x07c00010 | (COND_AL << 28) | (a0 << 12) | a1 + | (ofs << 7) | ((ofs + len - 1) << 16)); } -static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); + /* bfi becomes bfc with rn == 15. */ + tgen_deposit(s, type, a0, a1, 15, ofs, len); } -static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, - TCGArg a1, int ofs, int len, bool const_a1) -{ - if (const_a1) { - /* bfi becomes bfc with rn == 15. */ - a1 = 15; - } - /* bfi/bfc */ - tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1 - | (ofs << 7) | ((ofs + len - 1) << 16)); -} +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rZ), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; -static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, int ofs, int len) +static void tgen_extract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn, + unsigned ofs, unsigned len) { /* According to gcc, AND can be faster. */ if (ofs == 0 && len <= 8) { - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, + tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, encode_imm_nofail((1 << len) - 1)); return; } if (use_armv7_instructions) { /* ubfx */ - tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn + tcg_out32(s, 0x07e00050 | (COND_AL << 28) | (rd << 12) | rn | (ofs << 7) | ((len - 1) << 16)); return; } @@ -1048,23 +1013,30 @@ static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd, switch (len) { case 8: /* uxtb */ - tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06ef0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; case 16: /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; default: g_assert_not_reached(); } } -static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd, - TCGReg rn, int ofs, int len) +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn, + unsigned ofs, unsigned len) { if (use_armv7_instructions) { /* sbfx */ - tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn + tcg_out32(s, 0x07a00050 | (COND_AL << 28) | (rd << 12) | rn | (ofs << 7) | ((len - 1) << 16)); return; } @@ -1073,17 +1045,24 @@ static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd, switch (len) { case 8: /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06af0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; case 16: /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn); + tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | + (rd << 12) | (ofs << 7) | rn); break; default: g_assert_not_reached(); } } +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + static void tcg_out_ld32u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, int32_t offset) @@ -1105,66 +1084,6 @@ static void tcg_out_st32(TCGContext *s, ARMCond cond, tcg_out_st32_12(s, cond, rd, rn, offset); } -static void tcg_out_ld16u(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16u_8(s, cond, rd, rn, offset); -} - -static void tcg_out_ld16s(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld16s_8(s, cond, rd, rn, offset); -} - -static void tcg_out_st16(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st16_8(s, cond, rd, rn, offset); -} - -static void tcg_out_ld8u(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8_12(s, cond, rd, rn, offset); -} - -static void tcg_out_ld8s(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xff || offset < -0xff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_ld8s_8(s, cond, rd, rn, offset); -} - -static void tcg_out_st8(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int32_t offset) -{ - if (offset > 0xfff || offset < -0xfff) { - tcg_out_movi32(s, cond, TCG_REG_TMP, offset); - tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP); - } else - tcg_out_st8_12(s, cond, rd, rn, offset); -} - /* * The _goto case is normally between TBs within the same code buffer, and * with the code buffer limited to 16MB we wouldn't need the long case. @@ -1224,7 +1143,12 @@ static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) } } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_goto_label(s, COND_AL, l); +} + +static void tcg_out_mb(TCGContext *s, unsigned a0) { if (use_armv7_instructions) { tcg_out32(s, INSN_DMB_ISH); @@ -1233,44 +1157,53 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) } } -static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, - TCGArg b, int b_const) +static TCGCond tgen_cmp(TCGContext *s, TCGCond cond, TCGReg a, TCGReg b) +{ + if (is_tst_cond(cond)) { + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); + return tcg_tst_eqne_cond(cond); + } + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, a, b, SHIFT_IMM_LSL(0)); + return cond; +} + +static TCGCond tgen_cmpi(TCGContext *s, TCGCond cond, TCGReg a, TCGArg b) { + int imm12; + if (!is_tst_cond(cond)) { - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const); + tcg_out_dat_IN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b); return cond; } - cond = tcg_tst_eqne_cond(cond); - if (b_const) { - int imm12 = encode_imm(b); - - /* - * The compare constraints allow rIN, but TST does not support N. - * Be prepared to load the constant into a scratch register. - */ - if (imm12 >= 0) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); - return cond; - } + /* + * The compare constraints allow rIN, but TST does not support N. + * Be prepared to load the constant into a scratch register. + */ + imm12 = encode_imm(b); + if (imm12 >= 0) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); + } else { tcg_out_movi32(s, COND_AL, TCG_REG_TMP, b); - b = TCG_REG_TMP; + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, + a, TCG_REG_TMP, SHIFT_IMM_LSL(0)); } - tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); - return cond; + return tcg_tst_eqne_cond(cond); } -static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) +static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, + TCGArg b, int b_const) { - TCGReg al = args[0]; - TCGReg ah = args[1]; - TCGArg bl = args[2]; - TCGArg bh = args[3]; - TCGCond cond = args[4]; - int const_bl = const_args[2]; - int const_bh = const_args[3]; + if (b_const) { + return tgen_cmpi(s, cond, a, b); + } else { + return tgen_cmp(s, cond, a, b); + } +} +static TCGCond tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh) +{ switch (cond) { case TCG_COND_EQ: case TCG_COND_NE: @@ -1653,8 +1586,42 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + } + + tcg_out_qemu_ld_direct(s, opc, data, -1, h); + + if (ldst) { + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, q), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1662,7 +1629,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, true); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; @@ -1673,14 +1640,20 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, */ ldst->label_ptr[0] = s->code_ptr; tcg_out_bl_imm(s, COND_NE, 0); + } + + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + if (ldst) { ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); - } else { - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(e, p, q), + .out = tgen_qemu_ld2, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, HostAddress h) { @@ -1738,8 +1711,38 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + + h.cond = COND_EQ; + tcg_out_qemu_st_direct(s, opc, data, -1, h); + + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, data, -1, h); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(q, q), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; @@ -1747,7 +1750,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, ldst = prepare_host_addr(s, &h, addr, oi, false); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; @@ -1763,6 +1766,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(Q, p, q), + .out = tgen_qemu_st2, +}; + static void tcg_out_epilogue(TCGContext *s); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) @@ -1802,6 +1810,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_b_reg(s, COND_AL, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1821,393 +1834,816 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGArg a0, a1, a2, a3, a4, a5; - int c; + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, a0, a1, a2, SHIFT_IMM_LSL(0)); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_b_reg(s, COND_AL, args[0]); - break; - case INDEX_op_br: - tcg_out_goto_label(s, COND_AL, arg_label(args[0])); - break; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_ADD, ARITH_SUB, a0, a1, a2); +} - case INDEX_op_ld8u_i32: - tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16u_i32: - tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - tcg_out_st8(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - tcg_out_st16(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - tcg_out_st32(s, COND_AL, args[0], args[1], args[2]); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; - case INDEX_op_movcond_i32: - /* Constraints mean that v2 is always in the same register as dest, - * so we only need to do "if condition passed, move v1 to dest". - */ - c = tcg_out_cmp(s, args[5], args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, - ARITH_MVN, args[0], 0, args[3], const_args[3]); - break; - case INDEX_op_add_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_sub_i32: - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]); - } else { - tcg_out_dat_rI(s, COND_AL, ARITH_RSB, - args[0], args[2], args[1], 1); - } - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, - args[0], args[1], args[2], const_args[2]); - } - break; - case INDEX_op_and_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_andc_i32: - tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, - args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_or_i32: - c = ARITH_ORR; - goto gen_arith; - case INDEX_op_xor_i32: - c = ARITH_EOR; - /* Fall through. */ - gen_arith: - tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]); - break; - case INDEX_op_add2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if (a0 == a3 || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, - a0, a2, a4, const_args[4]); - tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC, - a1, a3, a5, const_args[5]); - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_sub2_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - a3 = args[3], a4 = args[4], a5 = args[5]; - if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) { - a0 = TCG_REG_TMP; - } - if (const_args[2]) { - if (const_args[4]) { - tcg_out_movi32(s, COND_AL, a0, a4); - a4 = a0; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1); - } else { - tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR, - ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]); - } - if (const_args[3]) { - if (const_args[5]) { - tcg_out_movi32(s, COND_AL, a1, a5); - a5 = a1; - } - tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1); - } else { - tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC, - a1, a3, a5, const_args[5]); - } - tcg_out_mov_reg(s, COND_AL, args[0], a0); - break; - case INDEX_op_neg_i32: - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0); - break; - case INDEX_op_not_i32: - tcg_out_dat_reg(s, COND_AL, - ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0)); - break; - case INDEX_op_mul_i32: - tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_mulu2_i32: - tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_muls2_i32: - tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - /* XXX: Perhaps args[2] & 0x1f is wrong */ - case INDEX_op_shl_i32: - c = const_args[2] ? - SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]); - goto gen_shift32; - case INDEX_op_shr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]); - goto gen_shift32; - case INDEX_op_sar_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]); - goto gen_shift32; - case INDEX_op_rotr_i32: - c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) : - SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]); - /* Fall through. */ - gen_shift32: - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c); - break; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADD | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - ((0x20 - args[2]) & 0x1f) ? - SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) : - SHIFT_IMM_LSL(0)); - } else { - tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20); - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], - SHIFT_REG_ROR(TCG_REG_TMP)); - } - break; +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR, + a0, a1, a2); +} - case INDEX_op_ctz_i32: - tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0); - a1 = TCG_REG_TMP; - goto do_clz; - - case INDEX_op_clz_i32: - a1 = args[1]; - do_clz: - a0 = args[0]; - a2 = args[2]; - c = const_args[2]; - if (c && a2 == 32) { - tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0); - break; - } +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, +}; + +static void tgen_addci(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADC, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addci_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_ADC, ARITH_SBC, a0, a1, a2); +} + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_addci, + .out_rri = tgen_addci_imm, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ADC | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_ADC | TO_CPSR, ARITH_SBC | TO_CPSR, + a0, a1, a2); +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, +}; + +/* Set C to @c; NZVQ all set to 0. */ +static void tcg_out_movi_apsr_c(TCGContext *s, bool c) +{ + int imm12 = encode_imm_nofail(c << 29); + tcg_out32(s, (COND_AL << 28) | INSN_MSRI_CPSR | 0x80000 | imm12); +} + +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out_movi_apsr_c(s, 1); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_AND, ARITH_BIC, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0); + tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0); + tcg_out_mov_reg(s, COND_EQ, a0, a2); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 32) { + tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0); + } else { tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0); tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0); - if (c || a0 != a2) { - tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c); - } - break; + tcg_out_movi32(s, COND_EQ, a0, a2); + } +} - case INDEX_op_brcond_i32: - c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], - ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; - case INDEX_op_negsetcond_i32: - c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], - ARITH_MVN, args[0], 0, 0); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; - case INDEX_op_brcond2_i32: - c = tcg_out_cmp2(s, args, const_args); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5])); - break; - case INDEX_op_setcond2_i32: - c = tcg_out_cmp2(s, args + 1, const_args + 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], - ARITH_MOV, args[0], 0, 0); - break; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); - break; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0); + tgen_clz(s, TCG_TYPE_I32, a0, TCG_REG_TMP, a2); +} - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], TCG_TYPE_I64); - break; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0); + tgen_clzi(s, TCG_TYPE_I32, a0, TCG_REG_TMP, a2); +} - case INDEX_op_bswap16_i32: - tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, COND_AL, args[0], args[1]); - break; +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return use_armv7_instructions ? C_O1_I2(r, r, rIK) : C_NotImplemented; +} - case INDEX_op_deposit_i32: - tcg_out_deposit(s, COND_AL, args[0], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_extract_i32: - tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_sextract_i32: - tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]); - break; - case INDEX_op_extract2_i32: - /* ??? These optimization vs zero should be generic. */ - /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */ - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_REG, args[0], 0); - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, - args[2], SHIFT_IMM_LSL(32 - args[3])); - } - } else if (const_args[2]) { - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, - args[1], SHIFT_IMM_LSR(args[3])); - } else { - /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, - args[2], SHIFT_IMM_LSL(32 - args[3])); - tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP, - args[1], SHIFT_IMM_LSR(args[3])); - } - break; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; - case INDEX_op_div_i32: - tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]); - break; - case INDEX_op_divu_i32: - tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); - break; +static TCGConstraintSetIndex cset_idiv(TCGType type, unsigned flags) +{ + return use_idiv_instructions ? C_O1_I2(r, r, r) : C_NotImplemented; +} - case INDEX_op_mb: - tcg_out_mb(s, args[0]); - break; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* sdiv */ + tcg_out32(s, 0x0710f010 | (COND_AL << 28) | (a0 << 16) | a1 | (a2 << 8)); +} - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8u_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i32: - default: - g_assert_not_reached(); +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_idiv, + .out_rrr = tgen_divs, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* udiv */ + tcg_out32(s, 0x0730f010 | (COND_AL << 28) | (a0 << 16) | a1 | (a2 << 8)); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_idiv, + .out_rrr = tgen_divu, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* mul */ + tcg_out32(s, (COND_AL << 28) | 0x90 | (a0 << 16) | (a1 << 8) | a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* smull */ + tcg_out32(s, (COND_AL << 28) | 0x00c00090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_O2_I2(r, r, r, r), + .out_rrrr = tgen_muls2, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) +{ + /* umull */ + tcg_out32(s, (COND_AL << 28) | 0x00800090 | + (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_O2_I2(r, r, r, r), + .out_rrrr = tgen_mulu2, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_ORR, a0, a1, encode_imm_nofail(a2)); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_ROR(a2)); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_IMM_ROR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_ASR(a2)); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_ASR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_LSL(a2)); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_LSL(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, SHIFT_REG_LSR(a2)); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, + SHIFT_IMM_LSR(a2 & 0x1f)); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SUB, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subfi(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSB, a0, a2, encode_imm_nofail(a1)); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, rI, r), + .out_rrr = tgen_sub, + .out_rir = tgen_subfi, +}; + +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SUB | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IN(s, COND_AL, ARITH_SUB | TO_CPSR, ARITH_ADD | TO_CPSR, + a0, a1, a2); +} + +static void tgen_subbo_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSB | TO_CPSR, + a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbo_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, rI, rIN), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, +}; + +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SBC, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_SBC, ARITH_ADC, a0, a1, a2); +} + +static void tgen_subbi_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSC, a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbi_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbi_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_O1_I2(r, rI, rIK), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, + .out_rir = tgen_subbi_rir, + .out_rii = tgen_subbi_rii, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_SBC | TO_CPSR, + a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_IK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR, + a0, a1, a2); +} + +static void tgen_subbio_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_RSC | TO_CPSR, + a0, a2, encode_imm_nofail(a1)); +} + +static void tgen_subbio_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, a2); + tgen_subbio_rir(s, TCG_TYPE_I32, a0, a1, TCG_REG_TMP); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, rI, rIK), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, + .out_rir = tgen_subbio_rir, + .out_rii = tgen_subbio_rii, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out_movi_apsr_c(s, 0); /* borrow = !carry */ +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_EOR, a0, a1, a2, SHIFT_IMM_LSL(0)); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_dat_imm(s, COND_AL, ARITH_EOR, a0, a1, encode_imm_nofail(a2)); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg rd, TCGReg rn, unsigned flags) +{ + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (COND_AL << 28) | (rd << 12) | rn); + return; + } + + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (COND_AL << 28) | (rd << 12) | rn); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, rd, rd); } } +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg rd, TCGReg rn, unsigned flags) +{ + /* rev */ + tcg_out32(s, 0x06bf0f30 | (COND_AL << 28) | (rd << 12) | rn); +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_subfi(s, type, a0, 0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_dat_reg(s, COND_AL, ARITH_MVN, a0, 0, a1, SHIFT_IMM_LSL(0)); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGLabel *l) +{ + cond = tgen_cmp(s, cond, a0, a1); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, tcg_target_long a1, TCGLabel *l) +{ + cond = tgen_cmpi(s, cond, a0, a1); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rIN), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + +static void finish_setcond(TCGContext *s, TCGCond cond, TCGReg ret, bool neg) +{ + tcg_out_movi32(s, tcg_cond_to_arm_cond[tcg_invert_cond(cond)], ret, 0); + tcg_out_movi32(s, tcg_cond_to_arm_cond[cond], ret, neg ? -1 : 1); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + cond = tgen_cmp(s, cond, a1, a2); + finish_setcond(s, cond, a0, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + cond = tgen_cmpi(s, cond, a1, a2); + finish_setcond(s, cond, a0, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + cond = tgen_cmp(s, cond, a1, a2); + finish_setcond(s, cond, a0, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + cond = tgen_cmpi(s, cond, a1, a2); + finish_setcond(s, cond, a0, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rIN), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + cond = tcg_out_cmp(s, cond, c1, c2, const_c2); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[cond], ARITH_MOV, ARITH_MVN, + ret, 0, vt, const_vt); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rIN, rIK, 0), + .out = tgen_movcond, +}; + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh, + TCGLabel *l) +{ + cond = tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[cond], l); +} + +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rI, rI), + .out = tgen_brcond2, +}; + +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) +{ + cond = tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + finish_setcond(s, cond, ret, false); +} + +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rI, rI), + .out = tgen_setcond2, +}; + +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + /* We can do extract2 in 2 insns, vs the 3 required otherwise. */ + tgen_shli(s, TCG_TYPE_I32, TCG_REG_TMP, a2, 32 - shr); + tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, TCG_REG_TMP, + a1, SHIFT_IMM_LSR(shr)); +} + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_extract2, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld8_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld8_12(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld8s_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld8s_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld16u_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld16u_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_ld16s_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_ld16s_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_st8(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xfff || offset < -0xfff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_st8_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_st8_12(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rn, ptrdiff_t offset) +{ + if (offset > 0xff || offset < -0xff) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, offset); + tcg_out_st16_r(s, COND_AL, rd, rn, TCG_REG_TMP); + } else { + tcg_out_st16_8(s, COND_AL, rd, rn, offset); + } +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_neg_i32: - case INDEX_op_not_i32: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap32_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16u_i32: - case INDEX_op_extract_i32: - case INDEX_op_sextract_i32: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - return C_O0_I2(r, r); - - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - return C_O1_I2(r, r, rIN); - - case INDEX_op_and_i32: - case INDEX_op_andc_i32: - case INDEX_op_clz_i32: - case INDEX_op_ctz_i32: - return C_O1_I2(r, r, rIK); - - case INDEX_op_mul_i32: - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - return C_O1_I2(r, r, r); - - case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i32: - return C_O2_I2(r, r, r, r); - - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - return C_O1_I2(r, r, rI); - - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return C_O1_I2(r, r, ri); - - case INDEX_op_brcond_i32: - return C_O0_I2(r, rIN); - case INDEX_op_deposit_i32: - return C_O1_I2(r, 0, rZ); - case INDEX_op_extract2_i32: - return C_O1_I2(r, rZ, rZ); - case INDEX_op_movcond_i32: - return C_O1_I4(r, r, rIN, rIK, 0); - case INDEX_op_add2_i32: - return C_O2_I4(r, r, r, r, rIN, rIK); - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rI, rI, rIN, rIK); - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, rI, rI); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, rI, rI); - - case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, q); - case INDEX_op_qemu_ld_i64: - return C_O2_I1(e, p, q); - case INDEX_op_qemu_st_i32: - return C_O0_I2(q, q); - case INDEX_op_qemu_st_i64: - return C_O0_I3(Q, p, q); - case INDEX_op_st_vec: return C_O0_I2(w, r); case INDEX_op_ld_vec: diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 06e6521..458d69c 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -42,9 +42,10 @@ C_O1_I2(r, 0, reZ) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, L, L) +C_O1_I2(r, r, r) C_O1_I2(r, r, re) C_O1_I2(r, r, ri) -C_O1_I2(r, r, rI) +C_O1_I2(r, rO, re) C_O1_I2(x, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) @@ -57,4 +58,3 @@ C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) C_O2_I2(r, r, L, L) C_O2_I3(a, d, 0, 1, r) -C_N1_O1_I4(r, r, 0, 1, re, re) diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h index 52142ab..dbedff1 100644 --- a/tcg/i386/tcg-target-con-str.h +++ b/tcg/i386/tcg-target-con-str.h @@ -20,7 +20,7 @@ REGS('r', ALL_GENERAL_REGS) REGS('x', ALL_VECTOR_REGS) REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_ld/st */ -REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */ +REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st MO_8 data */ /* * Define constraint letters for constants: diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h index 63768ff..42647fa 100644 --- a/tcg/i386/tcg-target-has.h +++ b/tcg/i386/tcg-target-has.h @@ -26,66 +26,9 @@ #define have_avx512vbmi2 ((cpuinfo & CPUINFO_AVX512VBMI2) && have_avx512vl) /* optional instructions */ -#define TCG_TARGET_HAS_div2_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 have_bmi1 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 have_popcnt -#define TCG_TARGET_HAS_extract2_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 - #if TCG_TARGET_REG_BITS == 64 /* Keep 32-bit values zero-extended in a register. */ #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_div2_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 have_bmi1 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 have_popcnt -#define TCG_TARGET_HAS_extract2_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 -#else -#define TCG_TARGET_HAS_qemu_st8_i32 1 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 33d303a..09fce27 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -424,6 +424,7 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_SHRD_Ib (0xac | P_EXT) +#define OPC_STC (0xf9) #define OPC_TESTB (0x84) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) @@ -1092,7 +1093,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, { tcg_target_long diff; - if (arg == 0) { + if (arg == 0 && !s->carry_live) { tgen_arithr(s, ARITH_XOR, ret, ret); return; } @@ -1167,7 +1168,7 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } -static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Given the strength of x86 memory ordering, we only need care for store-load ordering. Experimentally, "lock orl $0,0(%esp)" is @@ -1545,6 +1546,11 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small) } } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_jxx(s, JCC_JMP, l, 0); +} + static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1, TCGArg arg2, int const_arg2, int rexw) { @@ -1642,47 +1648,78 @@ static void tcg_out_brcond(TCGContext *s, int rexw, TCGCond cond, tcg_out_jxx(s, jcc, label, small); } -#if TCG_TARGET_REG_BITS == 32 -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args, bool small) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *label) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, false, label, false); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *label) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_brcond(s, rexw, cond, arg1, arg2, true, label, false); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, reT), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, + TCGReg ah, TCGArg bl, bool blconst, + TCGArg bh, bool bhconst, + TCGLabel *label_this, bool small) { TCGLabel *label_next = gen_new_label(); - TCGLabel *label_this = arg_label(args[5]); - TCGCond cond = args[4]; switch (cond) { case TCG_COND_EQ: case TCG_COND_TSTEQ: tcg_out_brcond(s, 0, tcg_invert_cond(cond), - args[0], args[2], const_args[2], label_next, 1); - tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], - label_this, small); + al, bl, blconst, label_next, true); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; case TCG_COND_NE: case TCG_COND_TSTNE: - tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2], - label_this, small); - tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3], - label_this, small); + tcg_out_brcond(s, 0, cond, al, bl, blconst, label_this, small); + tcg_out_brcond(s, 0, cond, ah, bh, bhconst, label_this, small); break; default: - tcg_out_brcond(s, 0, tcg_high_cond(cond), args[1], - args[3], const_args[3], label_this, small); + tcg_out_brcond(s, 0, tcg_high_cond(cond), + ah, bh, bhconst, label_this, small); tcg_out_jxx(s, JCC_JNE, label_next, 1); - tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), args[0], - args[2], const_args[2], label_this, small); + tcg_out_brcond(s, 0, tcg_unsigned_cond(cond), + al, bl, blconst, label_this, small); break; } tcg_out_label(s, label_next); } + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, + TCGReg ah, TCGArg bl, bool blconst, + TCGArg bh, bool bhconst, TCGLabel *l) +{ + tcg_out_brcond2(s, cond, al, ah, bl, blconst, bh, bhconst, l, false); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) #endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, ri, ri), + .out = tgen_brcond2, +}; -static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, - TCGArg dest, TCGArg arg1, TCGArg arg2, - int const_arg2, bool neg) +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGArg arg2, + bool const_arg2, bool neg) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; int cmp_rexw = rexw; bool inv = false; bool cleared; @@ -1757,7 +1794,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, case TCG_COND_LT: /* If arg2 is 0, extract the sign bit. */ if (const_arg2 && arg2 == 0) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, dest, arg1); + tcg_out_mov(s, type, dest, arg1); if (inv) { tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, dest); } @@ -1793,49 +1830,89 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond, } } -#if TCG_TARGET_REG_BITS == 32 -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) { - TCGArg new_args[6]; - TCGLabel *label_true, *label_over; + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(q, r, reT), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; - memcpy(new_args, args+1, 5*sizeof(TCGArg)); +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) +{ + TCGLabel *label_over = gen_new_label(); - if (args[0] == args[1] || args[0] == args[2] - || (!const_args[3] && args[0] == args[3]) - || (!const_args[4] && args[0] == args[4])) { - /* When the destination overlaps with one of the argument - registers, don't do anything tricky. */ - label_true = gen_new_label(); - label_over = gen_new_label(); + if (ret == al || ret == ah + || (!const_bl && ret == bl) + || (!const_bh && ret == bh)) { + /* + * When the destination overlaps with one of the argument + * registers, don't do anything tricky. + */ + TCGLabel *label_true = gen_new_label(); - new_args[5] = label_arg(label_true); - tcg_out_brcond2(s, new_args, const_args+1, 1); + tcg_out_brcond2(s, cond, al, ah, bl, const_bl, + bh, const_bh, label_true, true); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); tcg_out_jxx(s, JCC_JMP, label_over, 1); tcg_out_label(s, label_true); - tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); - tcg_out_label(s, label_over); + tcg_out_movi(s, TCG_TYPE_I32, ret, 1); } else { - /* When the destination does not overlap one of the arguments, - clear the destination first, jump if cond false, and emit an - increment in the true case. This results in smaller code. */ - - tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + /* + * When the destination does not overlap one of the arguments, + * clear the destination first, jump if cond false, and emit an + * increment in the true case. This results in smaller code. + */ + tcg_out_movi(s, TCG_TYPE_I32, ret, 0); - label_over = gen_new_label(); - new_args[4] = tcg_invert_cond(new_args[4]); - new_args[5] = label_arg(label_over); - tcg_out_brcond2(s, new_args, const_args+1, 1); + tcg_out_brcond2(s, tcg_invert_cond(cond), al, ah, bl, const_bl, + bh, const_bh, label_over, true); - tgen_arithi(s, ARITH_ADD, args[0], 1, 0); - tcg_out_label(s, label_over); + tgen_arithi(s, ARITH_ADD, ret, 1, 0); } + tcg_out_label(s, label_over); } + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) #endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, ri, ri), + .out = tgen_setcond2, +}; static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, TCGReg dest, TCGReg v1) @@ -1843,57 +1920,20 @@ static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); } -static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, - TCGReg v1) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, + TCGArg vf, bool consf_vf) { + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; int jcc = tcg_out_cmp(s, cond, c1, c2, const_c2, rexw); - tcg_out_cmov(s, jcc, rexw, dest, v1); + tcg_out_cmov(s, jcc, rexw, dest, vt); } -static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, - TCGArg arg2, bool const_a2) -{ - if (have_bmi1) { - tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); - } - } else { - tcg_debug_assert(dest != arg2); - tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); - tcg_out_cmov(s, JCC_JE, rexw, dest, arg2); - } -} - -static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, - TCGArg arg2, bool const_a2) -{ - if (have_lzcnt) { - tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); - if (const_a2) { - tcg_debug_assert(arg2 == (rexw ? 64 : 32)); - } else { - tcg_debug_assert(dest != arg2); - tcg_out_cmov(s, JCC_JB, rexw, dest, arg2); - } - } else { - tcg_debug_assert(!const_a2); - tcg_debug_assert(dest != arg1); - tcg_debug_assert(dest != arg2); - - /* Recall that the output of BSR is the index not the count. */ - tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); - tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); - - /* Since we have destroyed the flags from BSR, we have to re-test. */ - int jcc = tcg_out_cmp(s, TCG_COND_EQ, arg1, 0, 1, rexw); - tcg_out_cmov(s, jcc, rexw, dest, arg2); - } -} +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, reT, r, 0), + .out = tgen_movcond, +}; static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest) { @@ -2382,23 +2422,50 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + tcg_out_qemu_ld_direct(s, data, -1, h, type, get_memop(oi)); + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, L), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; ldst = prepare_host_addr(s, &h, addr, oi, true); - tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi)); + tcg_out_qemu_ld_direct(s, datalo, datahi, h, type, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(r, r, L), + .out = tgen_qemu_ld2, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, HostAddress h, MemOp memop) { @@ -2417,7 +2484,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, switch (memop & MO_SIZE) { case MO_8: - /* This is handled with constraints on INDEX_op_qemu_st8_i32. */ + /* This is handled with constraints in cset_qemu_st(). */ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4); tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + h.seg, datalo, h.base, h.index, 0, h.ofs); @@ -2509,8 +2576,38 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + tcg_out_qemu_st_direct(s, data, -1, h, get_memop(oi)); + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = -1; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static TCGConstraintSetIndex cset_qemu_st(TCGType type, unsigned flags) +{ + return flags == MO_8 ? C_O0_I2(s, L) : C_O0_I2(L, L); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_Dynamic : C_O0_I2(L, L), + .base.dynamic_constraint = + TCG_TARGET_REG_BITS == 32 ? cset_qemu_st : NULL, + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2519,13 +2616,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(L, L, L), + .out = tgen_qemu_st2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ @@ -2553,6 +2655,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + /* Jump to the given host address (could be epilogue) */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2562,480 +2670,938 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* no need to flush icache explicitly */ } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGArg a0, a1, a2; - int c, const_a2, vexop, rexw; + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; -#if TCG_TARGET_REG_BITS == 64 -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32) -#else -# define OP_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32) -#endif + if (a0 == a1) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); + } else if (a0 == a2) { + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, 0); + } +} - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - const_a2 = const_args[2]; - rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; - switch (opc) { - case INDEX_op_goto_ptr: - /* jmp to the given host address (could be epilogue) */ - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); - break; - case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0); - break; - OP_32_64(ld8u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2); - break; - OP_32_64(ld8s): - tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2); - break; - OP_32_64(ld16u): - /* Note that we can ignore REXW for the zero-extend to 64-bit. */ - tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2); - break; - OP_32_64(ld16s): - tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2); - break; -#if TCG_TARGET_REG_BITS == 64 - case INDEX_op_ld32u_i64: -#endif - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2); - break; + if (a0 == a1) { + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, false); + } else { + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, -1, 0, a2); + } +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, re), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADD + rexw, a0, a2); +} + +static void tgen_addco_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADD + rexw, a0, a2, true); +} + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addco, + .out_rri = tgen_addco_imm, +}; + +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_ADC + rexw, a0, a2); +} + +static void tgen_addcio_imm(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_ADC + rexw, a0, a2, true); +} + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_addcio, + .out_rri = tgen_addcio_imm, +}; + +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* Because "0O" is not a valid constraint, we must match ourselves. */ + if (a0 == a2) { + tgen_addcio(s, type, a0, a0, a1); + } else { + tcg_out_mov(s, type, a0, a1); + tgen_addcio(s, type, a0, a0, a2); + } +} + +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_addcio_imm(s, type, a0, a0, a2); +} + +static void tgen_addci_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tgen_addci_rri(s, type, a0, a2, a1); +} + +static void tgen_addci_rii(TCGContext *s, TCGType type, TCGReg a0, + tcg_target_long a1, tcg_target_long a2) +{ + if (a2 == 0) { + /* Implement 0 + 0 + C with -(x - x - c). */ + tgen_arithr(s, ARITH_SBB, a0, a0); + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, a0); + } else { + tcg_out_movi(s, type, a0, a2); + tgen_addcio_imm(s, type, a0, a0, a1); + } +} + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_O1_I2(r, rO, re), + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, + .out_rir = tgen_addci_rir, + .out_rii = tgen_addci_rii, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out8(s, OPC_STC); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_AND + rexw, a0, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_AND + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, 0, reZ), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); +} + +static TCGConstraintSetIndex cset_andc(TCGType type, unsigned flags) +{ + return have_bmi1 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_andc, + .out_rrr = tgen_andc, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_lzcnt) { + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + /* Recall that the output of BSR is the index not the count. */ + tcg_out_modrm(s, OPC_BSR + rexw, a0, a1); + tgen_arithi(s, ARITH_XOR + rexw, a0, rexw ? 63 : 31, 0); + + /* Since we have destroyed the flags from BSR, we have to re-test. */ + jcc = tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, rexw); + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_LZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return have_popcnt ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + int jcc; + + if (have_bmi1) { + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); + jcc = JCC_JB; + } else { + tcg_out_modrm(s, OPC_BSF + rexw, a0, a1); + jcc = JCC_JE; + } + tcg_out_cmov(s, jcc, rexw, a0, a2); +} + +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_TZCNT + rexw, a0, a1); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divs2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, a4); +} + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divs2, +}; + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, a4); +} + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_O2_I3(a, d, 0, 1, r), + .out_rr01r = tgen_divu2, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; - OP_32_64(st8): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2); - tcg_out8(s, a0); - } else { - tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2); - } - break; - OP_32_64(st16): - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2); - tcg_out16(s, a0); - } else { - tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2); - } - break; #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_st32_i64: -#endif - case INDEX_op_st_i32: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2); - tcg_out32(s, a0); - } else { - tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2); - } - break; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); +} - OP_32_64(add): - /* For 3-operand addition, use LEA. */ - if (a0 != a1) { - TCGArg c3 = 0; - if (const_a2) { - c3 = a2, a2 = -1; - } else if (a0 == a2) { - /* Watch out for dest = src + dest, since we've removed - the matching constraint on the add. */ - tgen_arithr(s, ARITH_ADD + rexw, a0, a1); - break; - } +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_extrh_i64_i32, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); - break; - } - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - gen_arith: - if (const_a2) { - tgen_arithi(s, c + rexw, a0, a2, 0); - } else { - tgen_arithr(s, c + rexw, a0, a2); - } - break; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); +} - OP_32_64(andc): - if (const_a2) { - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0); - } else { - tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1); - } - break; +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; - OP_32_64(mul): - if (const_a2) { - int32_t val; - val = a2; - if (val == (int8_t)val) { - tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); - tcg_out8(s, val); - } else { - tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); - tcg_out32(s, val); - } - } else { - tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2); - } - break; + if (a2 == (int8_t)a2) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0); + tcg_out8(s, a2); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0); + tcg_out32(s, a2); + } +} - OP_32_64(div2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); - break; - OP_32_64(divu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); - break; +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; - OP_32_64(shl): - /* For small constant 3-operand shift, use LEA. */ - if (const_a2 && a0 != a1 && (a2 - 1) < 3) { - if (a2 - 1 == 0) { - /* shl $1,a1,a0 -> lea (a1,a1),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); - } else { - /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ - tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); - } - break; - } - c = SHIFT_SHL; - vexop = OPC_SHLX; - goto gen_shift_maybe_vex; - OP_32_64(shr): - c = SHIFT_SHR; - vexop = OPC_SHRX; - goto gen_shift_maybe_vex; - OP_32_64(sar): - c = SHIFT_SAR; - vexop = OPC_SARX; - goto gen_shift_maybe_vex; - OP_32_64(rotl): - c = SHIFT_ROL; - goto gen_shift; - OP_32_64(rotr): - c = SHIFT_ROR; - goto gen_shift; - gen_shift_maybe_vex: - if (have_bmi2) { - if (!const_a2) { - tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1); - break; - } - tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1); - } - /* FALLTHRU */ - gen_shift: - if (const_a2) { - tcg_out_shifti(s, c + rexw, a0, a2); - } else { - tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0); - } - break; +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, a3); +} - OP_32_64(ctz): - tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; - OP_32_64(clz): - tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); - break; - OP_32_64(ctpop): - tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1); - break; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_muls2, +}; - OP_32_64(brcond): - tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1], - arg_label(args[3]), 0); - break; - OP_32_64(setcond): - tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, false); - break; - OP_32_64(negsetcond): - tcg_out_setcond(s, rexw, args[3], a0, a1, a2, const_a2, true); - break; - OP_32_64(movcond): - tcg_out_movcond(s, rexw, args[5], a0, a1, a2, const_a2, args[3]); - break; +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; - OP_32_64(bswap16): - if (a2 & TCG_BSWAP_OS) { - /* Output must be sign-extended. */ - if (rexw) { - tcg_out_bswap64(s, a0); - tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); - } else { - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SAR, a0, 16); - } - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* Output must be zero-extended, but input isn't. */ - tcg_out_bswap32(s, a0); - tcg_out_shifti(s, SHIFT_SHR, a0, 16); - } else { - tcg_out_rolw_8(s, a0); - } - break; - OP_32_64(bswap32): - tcg_out_bswap32(s, a0); - if (rexw && (a2 & TCG_BSWAP_OS)) { - tcg_out_ext32s(s, a0, a0); - } - break; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; - OP_32_64(neg): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); - break; - OP_32_64(not): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); - break; +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, a3); +} - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, -1, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I128); - break; +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_O2_I2(a, d, a, r), + .out_rrrr = tgen_mulu2, +}; - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st8_i32: - tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, -1, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I128); - break; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; - OP_32_64(mulu2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); - break; - OP_32_64(muls2): - tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]); - break; - OP_32_64(add2): - if (const_args[4]) { - tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1); - } else { - tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1); - } else { - tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]); - } - break; - OP_32_64(sub2): - if (const_args[4]) { - tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1); - } else { - tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]); - } - if (const_args[5]) { - tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1); +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_OR + rexw, a0, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_OR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROL, a0); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROL + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_ROR, a0); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_shifti(s, SHIFT_ROR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, 0, ci), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static TCGConstraintSetIndex cset_shift(TCGType type, unsigned flags) +{ + return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); +} + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SARX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SAR, a0); + } +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHLX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHL, a0); + } +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* For small constant 3-operand shift, use LEA. */ + if (a0 != a1 && a2 >= 1 && a2 <= 3) { + if (a2 == 1) { + /* shl $1,a1,a0 -> lea (a1,a1),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0); } else { - tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]); + /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */ + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0); } - break; + return; + } + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHL + rexw, a0, a2); +} -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args, 0); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; -#else /* TCG_TARGET_REG_BITS == 64 */ - case INDEX_op_ld32s_i64: - tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2); - break; - case INDEX_op_st_i64: - if (const_args[0]) { - tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2); - tcg_out32(s, a0); +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + if (have_bmi2) { + tcg_out_vex_modrm(s, OPC_SHRX + rexw, a0, a2, a1); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, SHIFT_SHR, a0); + } +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + tcg_out_mov(s, type, a0, a1); + tcg_out_shifti(s, SHIFT_SHR + rexw, a0, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_shift, + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SUB + rexw, a0, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SUB + rexw, a0, a2, 1); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_sub, + .out_rri = tgen_subbo_rri, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_SBB + rexw, a0, a2); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_SBB + rexw, a0, a2, 1); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, +}; + +#define outop_subbi outop_subbio + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out8(s, OPC_STC); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithr(s, ARITH_XOR + rexw, a0, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tgen_arithi(s, ARITH_XOR + rexw, a0, a2, false); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, 0, re), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + if (flags & TCG_BSWAP_OS) { + /* Output must be sign-extended. */ + if (rexw) { + tcg_out_bswap64(s, a0); + tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48); } else { - tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2); + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SAR, a0, 16); } - break; + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* Output must be zero-extended, but input isn't. */ + tcg_out_bswap32(s, a0); + tcg_out_shifti(s, SHIFT_SHR, a0, 16); + } else { + tcg_out_rolw_8(s, a0); + } +} - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, a0); - break; - case INDEX_op_extrh_i64_i32: - tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); - break; +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_bswap32(s, a0); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap32, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_bswap64(s, a0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_bswap64, +}; #endif - OP_32_64(deposit): - if (args[3] == 0 && args[4] == 8) { - /* load bits 0..7 */ - if (const_a2) { - tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), - 0, a0, 0); - tcg_out8(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); - } - } else if (TCG_TARGET_REG_BITS == 32 && args[3] == 8 && args[4] == 8) { - /* load bits 8..15 */ - if (const_a2) { - tcg_out8(s, OPC_MOVB_Ib + a0 + 4); - tcg_out8(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); - } - } else if (args[3] == 0 && args[4] == 16) { - /* load bits 0..15 */ - if (const_a2) { - tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), - 0, a0, 0); - tcg_out16(s, a2); - } else { - tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); - } - } else { - g_assert_not_reached(); - } - break; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0); +} - case INDEX_op_extract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32u(s, a0, a1); - break; - } - /* This is a 32-bit zero-extending right shift. */ - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, a2); - break; - } - /* FALLTHRU */ - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] == 8) { +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, 0), + .out_rr = tgen_not, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (ofs == 0 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0); + } else if (ofs == 0 && len == 16) { + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4); + } else { + g_assert_not_reached(); + } +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + if (ofs == 0 && len == 8) { + tcg_out_opc(s, OPC_MOVB_Ib | P_REXB_RM | LOWREGMASK(a0), 0, a0, 0); + tcg_out8(s, a2); + } else if (ofs == 0 && len == 16) { + tcg_out_opc(s, OPC_MOVL_Iv | P_DATA16 | LOWREGMASK(a0), 0, a0, 0); + tcg_out16(s, a2); + } else if (TCG_TARGET_REG_BITS == 32 && ofs == 8 && len == 8) { + tcg_out8(s, OPC_MOVB_Ib + a0 + 4); + tcg_out8(s, a2); + } else { + g_assert_not_reached(); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(q, 0, qi), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: tcg_out_ext8u(s, a0, a1); - } else if (a2 == 0 && args[3] == 16) { + return; + case 16: tcg_out_ext16u(s, a0, a1); - } else if (a2 == 8 && args[3] == 8) { - /* - * On the off-chance that we can use the high-byte registers. - * Otherwise we emit the same ext16 + shift pattern that we - * would have gotten from the normal tcg-op.c expansion. - */ - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); - } else { - tcg_out_ext16u(s, a0, a1); - tcg_out_shifti(s, SHIFT_SHR, a0, 8); - } + return; + case 32: + tcg_out_ext32u(s, a0, a1); + return; + } + } else if (TCG_TARGET_REG_BITS == 64 && ofs + len == 32) { + /* This is a 32-bit zero-extending right shift. */ + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, ofs); + return; + } else if (ofs == 8 && len == 8) { + /* + * On the off-chance that we can use the high-byte registers. + * Otherwise we emit the same ext16 + shift pattern that we + * would have gotten from the normal tcg-op.c expansion. + */ + if (a1 < 4 && (TCG_TARGET_REG_BITS == 32 || a0 < 8)) { + tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4); } else { - g_assert_not_reached(); + tcg_out_ext16u(s, a0, a1); + tcg_out_shifti(s, SHIFT_SHR, a0, 8); } - break; + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; - case INDEX_op_sextract_i64: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1); - } else if (a2 == 0 && args[3] == 32) { +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: tcg_out_ext32s(s, a0, a1); - } else { - g_assert_not_reached(); + return; } - break; - - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); - } else if (a2 == 8 && args[3] == 8) { - if (a1 < 4 && a0 < 8) { - tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); - } else { - tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1); - tcg_out_shifti(s, SHIFT_SAR, a0, 8); - } + } else if (ofs == 8 && len == 8) { + if (type == TCG_TYPE_I32 && a1 < 4 && a0 < 8) { + tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4); } else { - g_assert_not_reached(); + tcg_out_ext16s(s, type, a0, a1); + tgen_sari(s, type, a0, a0, 8); } - break; + return; + } + g_assert_not_reached(); +} - OP_32_64(extract2): - /* Note that SHRD outputs to the r/m operand. */ - tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); - tcg_out8(s, args[3]); - break; +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); - } +static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a1, TCGReg a2, unsigned shr) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + + /* Note that SHRD outputs to the r/m operand. */ + tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0); + tcg_out8(s, shr); +} -#undef OP_32_64 +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_extract2, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVZBL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVZWL, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, dest, base, offset); } +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_GvEv, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVSLQ, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, data, base, offset); +} + +static void tgen_st8_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, base, offset); + tcg_out8(s, data); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(qi, r), + .out_r = tgen_st8_r, + .out_i = tgen_st8_i, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, data, base, offset); +} + +static void tgen_st16_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, base, offset); + tcg_out16(s, data); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(ri, r), + .out_r = tgen_st16_r, + .out_i = tgen_st16_i, +}; + +static void tgen_st_i(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, ptrdiff_t offset) +{ + bool ok = tcg_out_sti(s, type, data, base, offset); + tcg_debug_assert(ok); +} + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(re, r), + .out_r = tcg_out_st, + .out_i = tgen_st_i, +}; + static int const umin_insn[4] = { OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ }; @@ -3581,182 +4147,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - return C_O0_I2(qi, r); - - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - return C_O0_I2(ri, r); - - case INDEX_op_st_i64: - return C_O0_I2(re, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - return C_O1_I2(r, r, re); - - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - return C_O1_I2(r, 0, re); - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - return C_O1_I2(r, 0, reZ); - - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - return C_O1_I2(r, r, rI); - - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci); - - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, 0, ci); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, reT); - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_extrh_i64_i32: - return C_O1_I1(r, 0); - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - return C_O1_I1(r, q); - - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: - return C_O1_I1(r, r); - - case INDEX_op_extract2_i32: - case INDEX_op_extract2_i64: - return C_O1_I2(r, 0, r); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(q, 0, qi); - - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(q, r, reT); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, reT, r, 0); - - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: - return C_O2_I3(a, d, 0, 1, r); - - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: - return C_O2_I2(a, d, a, r); - - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, re, re); - - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r); - - case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, L); - - case INDEX_op_qemu_st_i32: - return C_O0_I2(L, L); - case INDEX_op_qemu_st8_i32: - return C_O0_I2(s, L); - - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L); - - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L); - - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - return C_O2_I1(r, r, L); - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - return C_O0_I3(L, L, L); - - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, ri, ri); - - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, ri, ri); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: return C_O1_I1(x, r); diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h index 8afaee9..fd731c0 100644 --- a/tcg/loongarch64/tcg-target-con-set.h +++ b/tcg/loongarch64/tcg-target-con-set.h @@ -16,25 +16,22 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rz) +C_O0_I2(r, rz) C_O0_I2(w, r) C_O0_I3(r, r, r) C_O1_I1(r, r) C_O1_I1(w, r) C_O1_I1(w, w) -C_O1_I2(r, r, rC) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rU) C_O1_I2(r, r, rW) C_O1_I2(r, 0, rz) -C_O1_I2(r, rz, ri) -C_O1_I2(r, rz, rJ) -C_O1_I2(r, rz, rz) C_O1_I2(w, w, w) C_O1_I2(w, w, wM) C_O1_I2(w, w, wA) C_O1_I3(w, w, w, w) -C_O1_I4(r, rz, rJ, rz, rz) +C_O1_I4(r, r, rJ, rz, rz) C_N2_I1(r, r, r) diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h index 9975912..e5e5745 100644 --- a/tcg/loongarch64/tcg-target-con-str.h +++ b/tcg/loongarch64/tcg-target-con-str.h @@ -23,7 +23,6 @@ REGS('w', ALL_VECTOR_REGS) CONST('I', TCG_CT_CONST_S12) CONST('J', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U12) -CONST('C', TCG_CT_CONST_C12) CONST('W', TCG_CT_CONST_WSZ) CONST('M', TCG_CT_CONST_VCMP) CONST('A', TCG_CT_CONST_VADD) diff --git a/tcg/loongarch64/tcg-target-has.h b/tcg/loongarch64/tcg-target-has.h index 188b007..32abc6f 100644 --- a/tcg/loongarch64/tcg-target-has.h +++ b/tcg/loongarch64/tcg-target-has.h @@ -9,68 +9,8 @@ #include "host/cpuinfo.h" -/* optional instructions */ -#define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_div2_i32 0 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - /* 64-bit operations */ -#define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_div2_i64 0 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_qemu_ldst_i128 (cpuinfo & CPUINFO_LSX) diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index cbd7642..e5580d6 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -176,10 +176,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_S12 0x100 #define TCG_CT_CONST_S32 0x200 #define TCG_CT_CONST_U12 0x400 -#define TCG_CT_CONST_C12 0x800 -#define TCG_CT_CONST_WSZ 0x1000 -#define TCG_CT_CONST_VCMP 0x2000 -#define TCG_CT_CONST_VADD 0x4000 +#define TCG_CT_CONST_WSZ 0x800 +#define TCG_CT_CONST_VCMP 0x1000 +#define TCG_CT_CONST_VADD 0x2000 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -205,18 +204,27 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) { return true; } - if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) { - return true; - } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return true; } - int64_t vec_val = sextract64(val, 0, 8 << vece); - if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) { - return true; - } - if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { - return true; + if (ct & (TCG_CT_CONST_VCMP | TCG_CT_CONST_VADD)) { + int64_t vec_val = sextract64(val, 0, 8 << vece); + if (ct & TCG_CT_CONST_VCMP) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_LE: + case TCG_COND_LT: + return -0x10 <= vec_val && vec_val <= 0x0f; + case TCG_COND_LEU: + case TCG_COND_LTU: + return 0x00 <= vec_val && vec_val <= 0x1f; + default: + return false; + } + } + if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) { + return true; + } } return false; } @@ -293,7 +301,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, * TCG intrinsics */ -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Baseline LoongArch only has the full barrier, unfortunately. */ tcg_out_opc_dbar(s, 0); @@ -538,28 +546,6 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg) tcg_out_ext32s(s, ret, arg); } -static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc, - TCGReg a0, TCGReg a1, TCGReg a2, - bool c2, bool is_32bit) -{ - if (c2) { - /* - * Fast path: semantics already satisfied due to constraint and - * insn behavior, single instruction is enough. - */ - tcg_debug_assert(a2 == (is_32bit ? 32 : 64)); - /* all clz/ctz insns belong to DJ-format */ - tcg_out32(s, encode_dj_insn(opc, a0, a1)); - return; - } - - tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1)); - /* a0 = a1 ? REG_TMP0 : a2 */ - tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); - tcg_out_opc_masknez(s, a0, a2, a1); - tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0); -} - #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -660,14 +646,29 @@ static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, tcg_target_long arg2, bool c2) + TCGReg arg1, tcg_target_long arg2, + bool c2, bool neg) { int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; - if (tmpflags != ret) { - TCGReg tmp = tmpflags & ~SETCOND_FLAGS; - + if (neg) { + /* If intermediate result is zero/non-zero: test != 0. */ + if (tmpflags & SETCOND_NEZ) { + tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp); + tmp = ret; + } + /* Produce the 0/-1 result. */ + if (tmpflags & SETCOND_INV) { + tcg_out_opc_addi_d(s, ret, tmp, -1); + } else { + tcg_out_opc_sub_d(s, ret, TCG_REG_ZERO, tmp); + } + } else { switch (tmpflags & SETCOND_FLAGS) { + case 0: + tcg_debug_assert(tmp == ret); + break; case SETCOND_INV: /* Intermediate result is boolean: simply invert. */ tcg_out_opc_xori(s, ret, tmp, 1); @@ -686,11 +687,47 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, tcg_target_long c2, bool const2, - TCGReg v1, TCGReg v2) +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { - int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2); + int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const_c2); TCGReg t; /* Standardize the test below to t != 0. */ @@ -710,10 +747,21 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rJ, rz, rz), + .out = tgen_movcond, +}; + /* * Branch helpers */ +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, l, 0); + tcg_out_opc_b(s, 0); +} + static const struct { LoongArchInsn op; bool swap; @@ -730,8 +778,8 @@ static const struct { [TCG_COND_GTU] = { OPC_BGTU, false } }; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { LoongArchInsn op = tcg_brcond_to_loongarch[cond].op; @@ -748,6 +796,11 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0)); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; @@ -1114,22 +1167,27 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; ldst = prepare_host_addr(s, &h, addr_reg, oi, true); - tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h); + tcg_out_qemu_ld_indexed(s, get_memop(oi), type, data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, TCGReg rd, HostAddress h) { @@ -1154,8 +1212,8 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -1164,12 +1222,17 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -1217,6 +1280,28 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_N2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(r, r, r), + .out = tgen_qemu_st2, +}; + /* * Entry-points */ @@ -1254,6 +1339,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1274,445 +1364,684 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) -{ - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - TCGArg a3 = args[3]; - int c2 = const_args[2]; - - switch (opc) { - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_goto_ptr: - tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0); - break; +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_add_w(s, a0, a1, a2); + } else { + tcg_out_opc_add_d(s, a0, a1, a2); + } +} - case INDEX_op_br: - tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0), - 0); - tcg_out_opc_b(s, 0); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tcg_out_addi, +}; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_extrh_i64_i32: - tcg_out_opc_srai_d(s, a0, a1, 32); - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO); - break; +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - if (c2) { - tcg_out_opc_ori(s, a0, a1, a2); - tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO); - } else { - tcg_out_opc_nor(s, a0, a1, a2); - } - break; +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - if (c2) { - /* guaranteed to fit due to constraint */ - tcg_out_opc_andi(s, a0, a1, ~a2); - } else { - tcg_out_opc_andn(s, a0, a1, a2); - } - break; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_and(s, a0, a1, a2); +} - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - if (c2) { - /* guaranteed to fit due to constraint */ - tcg_out_opc_ori(s, a0, a1, ~a2); - } else { - tcg_out_opc_orn(s, a0, a1, a2); - } - break; +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_andi(s, a0, a1, a2); +} - case INDEX_op_and_i32: - case INDEX_op_and_i64: - if (c2) { - tcg_out_opc_andi(s, a0, a1, a2); - } else { - tcg_out_opc_and(s, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - if (c2) { - tcg_out_opc_ori(s, a0, a1, a2); - } else { - tcg_out_opc_or(s, a0, a1, a2); - } - break; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_andn(s, a0, a1, a2); +} - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - if (c2) { - tcg_out_opc_xori(s, a0, a1, a2); - } else { - tcg_out_opc_xor(s, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] <= 12) { - tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1); - } - break; - case INDEX_op_extract_i64: - if (a2 == 0 && args[3] <= 12) { - tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1); - } - break; +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* a2 is constrained to exactly the type width. */ + if (type == TCG_TYPE_I32) { + tcg_out_opc_clz_w(s, a0, a1); + } else { + tcg_out_opc_clz_d(s, a0, a1); + } +} - case INDEX_op_sextract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32s(s, a0, a1); - } else { - tcg_out_opc_srai_w(s, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); - } - break; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_clzi(s, type, TCG_REG_TMP0, a1, /* ignored */ 0); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, a0, TCG_REG_TMP0); +} - case INDEX_op_deposit_i32: - tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1); - break; - case INDEX_op_deposit_i64: - tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1); - break; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rW), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_opc_revb_2h(s, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext16u(s, a0, a0); - } - break; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_bswap32_i32: - /* All 32-bit values are computed sign-extended in the register. */ - a2 = TCG_BSWAP_OS; - /* fallthrough */ - case INDEX_op_bswap32_i64: - tcg_out_opc_revb_2w(s, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, a0, a0); - } - break; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* a2 is constrained to exactly the type width. */ + if (type == TCG_TYPE_I32) { + tcg_out_opc_ctz_w(s, a0, a1); + } else { + tcg_out_opc_ctz_d(s, a0, a1); + } +} - case INDEX_op_bswap64_i64: - tcg_out_opc_revb_d(s, a0, a1); - break; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_ctzi(s, type, TCG_REG_TMP0, a1, /* ignored */ 0); + /* a0 = a1 ? REG_TMP0 : a2 */ + tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1); + tcg_out_opc_masknez(s, a0, a2, a1); + tcg_out_opc_or(s, a0, a0, TCG_REG_TMP0); +} - case INDEX_op_clz_i32: - tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true); - break; - case INDEX_op_clz_i64: - tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false); - break; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, rW), + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; - case INDEX_op_ctz_i32: - tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true); - break; - case INDEX_op_ctz_i64: - tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false); - break; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_div_w(s, a0, a1, a2); + } else { + tcg_out_opc_div_d(s, a0, a1, a2); + } +} - case INDEX_op_shl_i32: - if (c2) { - tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_sll_w(s, a0, a1, a2); - } - break; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_sll_d(s, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; - case INDEX_op_shr_i32: - if (c2) { - tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_srl_w(s, a0, a1, a2); - } - break; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_srl_d(s, a0, a1, a2); - } - break; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_sar_i32: - if (c2) { - tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_sra_w(s, a0, a1, a2); - } - break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_sra_d(s, a0, a1, a2); - } - break; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_div_wu(s, a0, a1, a2); + } else { + tcg_out_opc_div_du(s, a0, a1, a2); + } +} - case INDEX_op_rotl_i32: - /* transform into equivalent rotr/rotri */ - if (c2) { - tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f); - } else { - tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0); - } - break; - case INDEX_op_rotl_i64: - /* transform into equivalent rotr/rotri */ - if (c2) { - tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f); - } else { - tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0); - } - break; +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_rotr_w(s, a0, a1, a2); - } - break; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_rotr_d(s, a0, a1, a2); - } - break; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_add_i32: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2); - } else { - tcg_out_opc_add_w(s, a0, a1, a2); - } - break; - case INDEX_op_add_i64: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2); - } else { - tcg_out_opc_add_d(s, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_sub_i32: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2); - } else { - tcg_out_opc_sub_w(s, a0, a1, a2); - } - break; - case INDEX_op_sub_i64: - if (c2) { - tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2); - } else { - tcg_out_opc_sub_d(s, a0, a1, a2); - } - break; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_srai_d(s, a0, a1, 32); +} - case INDEX_op_neg_i32: - tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_neg_i64: - tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1); - break; +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; - case INDEX_op_mul_i32: +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { tcg_out_opc_mul_w(s, a0, a1, a2); - break; - case INDEX_op_mul_i64: + } else { tcg_out_opc_mul_d(s, a0, a1, a2); - break; + } +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_mulsh_i32: +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { tcg_out_opc_mulh_w(s, a0, a1, a2); - break; - case INDEX_op_mulsh_i64: + } else { tcg_out_opc_mulh_d(s, a0, a1, a2); - break; + } +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; - case INDEX_op_muluh_i32: +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { tcg_out_opc_mulh_wu(s, a0, a1, a2); - break; - case INDEX_op_muluh_i64: + } else { tcg_out_opc_mulh_du(s, a0, a1, a2); - break; + } +} - case INDEX_op_div_i32: - tcg_out_opc_div_w(s, a0, a1, a2); - break; - case INDEX_op_div_i64: - tcg_out_opc_div_d(s, a0, a1, a2); - break; +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; - case INDEX_op_divu_i32: - tcg_out_opc_div_wu(s, a0, a1, a2); - break; - case INDEX_op_divu_i64: - tcg_out_opc_div_du(s, a0, a1, a2); - break; +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_nor(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_or(s, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_ori(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_orn(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; - case INDEX_op_rem_i32: +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { tcg_out_opc_mod_w(s, a0, a1, a2); - break; - case INDEX_op_rem_i64: + } else { tcg_out_opc_mod_d(s, a0, a1, a2); - break; + } +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; - case INDEX_op_remu_i32: +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { tcg_out_opc_mod_wu(s, a0, a1, a2); - break; - case INDEX_op_remu_i64: + } else { tcg_out_opc_mod_du(s, a0, a1, a2); - break; + } +} - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2); - break; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]); - break; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, OPC_LD_B, a0, a1, a2); - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, OPC_LD_H, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, OPC_LD_W, a0, a1, a2); - break; - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, OPC_LD_D, a0, a1, a2); - break; +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_rotr_w(s, a0, a1, a2); + } else { + tcg_out_opc_rotr_d(s, a0, a1, a2); + } +} - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, OPC_ST_B, a0, a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, OPC_ST_H, a0, a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, OPC_ST_W, a0, a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, OPC_ST_D, a0, a1, a2); - break; +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f); + } +} - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_ld_i128: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i128: - tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false); - break; +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sra_w(s, a0, a1, a2); + } else { + tcg_out_opc_sra_d(s, a0, a1, a2); } } +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sll_w(s, a0, a1, a2); + } else { + tcg_out_opc_sll_d(s, a0, a1, a2); + } +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srl_w(s, a0, a1, a2); + } else { + tcg_out_opc_srl_d(s, a0, a1, a2); + } +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sub_w(s, a0, a1, a2); + } else { + tcg_out_opc_sub_d(s, a0, a1, a2); + } +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_xor(s, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_xori(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_revb_2h(s, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_revb_2w(s, a0, a1); + + /* All 32-bit values are computed sign-extended in the register. */ + if (type == TCG_TYPE_I32 || (flags & TCG_BSWAP_OS)) { + tcg_out_ext32s(s, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, a0, a0); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_revb_d(s, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, TCG_REG_ZERO); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_bstrins_w(s, a0, a2, ofs, ofs + len - 1); + } else { + tcg_out_opc_bstrins_d(s, a0, a2, ofs, ofs + len - 1); + } +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rz), + .out_rrr = tgen_deposit, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 12) { + tcg_out_opc_andi(s, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_opc_bstrpick_w(s, a0, a1, ofs, ofs + len - 1); + } else { + tcg_out_opc_bstrpick_d(s, a0, a1, ofs, ofs + len - 1); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs + len == 32) { + tcg_out_opc_srai_w(s, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_BU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_B, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_HU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_H, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_WU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LD_W, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_ST_B, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_ST_H, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, TCGReg rs) { @@ -2027,28 +2356,22 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, * Try vseqi/vslei/vslti */ int64_t value = sextract64(a2, 0, 8 << vece); - if ((cond == TCG_COND_EQ || - cond == TCG_COND_LE || - cond == TCG_COND_LT) && - (-0x10 <= value && value <= 0x0f)) { + switch (cond) { + case TCG_COND_EQ: + case TCG_COND_LE: + case TCG_COND_LT: insn = cmp_vec_imm_insn[cond][lasx][vece]; tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value)); break; - } else if ((cond == TCG_COND_LEU || - cond == TCG_COND_LTU) && - (0x00 <= value && value <= 0x1f)) { + case TCG_COND_LEU: + case TCG_COND_LTU: insn = cmp_vec_imm_insn[cond][lasx][vece]; tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value)); break; + default: + g_assert_not_reached(); } - - /* - * Fallback to: - * dupi_vec temp, a2 - * cmp_vec a0, a1, temp, cond - */ - tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2); - a2 = TCG_VEC_TMP0; + break; } insn = cmp_vec_insn[cond][lasx][vece]; @@ -2213,150 +2536,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i32: - case INDEX_op_st_i64: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(rz, r); - - case INDEX_op_qemu_ld_i128: - return C_N2_I1(r, r, r); - - case INDEX_op_qemu_st_i128: - return C_O0_I3(r, r, r); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); - - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extrh_i64_i32: - case INDEX_op_ext_i32_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, r); - - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - /* - * LoongArch insns for these ops don't have reg-imm forms, but we - * can express using andi/ori if ~constant satisfies - * TCG_CT_CONST_U12. - */ - return C_O1_I2(r, r, rC); - - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - - case INDEX_op_add_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_add_i64: - return C_O1_I2(r, r, rJ); - - case INDEX_op_and_i32: - case INDEX_op_and_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - /* LoongArch reg-imm bitops have their imms ZERO-extended */ - return C_O1_I2(r, r, rU); - - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rW); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - /* Must deposit into the same register as input */ - return C_O1_I2(r, 0, rz); - - case INDEX_op_sub_i32: - case INDEX_op_setcond_i32: - return C_O1_I2(r, rz, ri); - case INDEX_op_sub_i64: - case INDEX_op_setcond_i64: - return C_O1_I2(r, rz, rJ); - - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_mulsh_i32: - case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i32: - case INDEX_op_muluh_i64: - case INDEX_op_div_i32: - case INDEX_op_div_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - return C_O1_I2(r, rz, rz); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, rz, rJ, rz, rz); - case INDEX_op_ld_vec: case INDEX_op_dupm_vec: case INDEX_op_dup_vec: diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h index 06ab04c..5304691 100644 --- a/tcg/mips/tcg-target-con-set.h +++ b/tcg/mips/tcg-target-con-set.h @@ -10,12 +10,10 @@ * tcg-target-con-str.h; the constraint combination is inclusive or. */ C_O0_I1(r) +C_O0_I2(r, rz) C_O0_I2(rz, r) -C_O0_I2(rz, rz) -C_O0_I3(rz, r, r) C_O0_I3(rz, rz, r) -C_O0_I4(rz, rz, rz, rz) -C_O0_I4(rz, rz, r, r) +C_O0_I4(r, r, rz, rz) C_O1_I1(r, r) C_O1_I2(r, 0, rz) C_O1_I2(r, r, r) @@ -23,11 +21,10 @@ C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) C_O1_I2(r, r, rIK) C_O1_I2(r, r, rJ) +C_O1_I2(r, r, rz) C_O1_I2(r, r, rzW) -C_O1_I2(r, rz, rN) -C_O1_I2(r, rz, rz) -C_O1_I4(r, rz, rz, rz, 0) -C_O1_I4(r, rz, rz, rz, rz) +C_O1_I4(r, r, rz, rz, 0) +C_O1_I4(r, r, rz, rz, rz) +C_O1_I4(r, r, r, rz, rz) C_O2_I1(r, r, r) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, rz, rz, rN, rN) diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h index dfe2b15..db2b225 100644 --- a/tcg/mips/tcg-target-con-str.h +++ b/tcg/mips/tcg-target-con-str.h @@ -17,5 +17,4 @@ REGS('r', ALL_GENERAL_REGS) CONST('I', TCG_CT_CONST_U16) CONST('J', TCG_CT_CONST_S16) CONST('K', TCG_CT_CONST_P2M1) -CONST('N', TCG_CT_CONST_N16) CONST('W', TCG_CT_CONST_WSZ) diff --git a/tcg/mips/tcg-target-has.h b/tcg/mips/tcg-target-has.h index df6960f..b9eb338 100644 --- a/tcg/mips/tcg-target-has.h +++ b/tcg/mips/tcg-target-has.h @@ -39,77 +39,13 @@ extern bool use_mips32r2_instructions; #endif /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muluh_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 0 - #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions) -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_ext32s_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 0 #endif /* optional instructions detected at runtime */ -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions -#define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - -#if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 -#endif - -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */ -#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */ - -#if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */ -#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ -#endif - #define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_HAS_tst 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f8c105b..2c0457e 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -187,8 +187,7 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define TCG_CT_CONST_U16 0x100 /* Unsigned 16-bit: 0 - 0xffff. */ #define TCG_CT_CONST_S16 0x200 /* Signed 16-bit: -32768 - 32767 */ #define TCG_CT_CONST_P2M1 0x400 /* Power of 2 minus 1. */ -#define TCG_CT_CONST_N16 0x800 /* "Negatable" 16-bit: -32767 - 32767 */ -#define TCG_CT_CONST_WSZ 0x1000 /* word size */ +#define TCG_CT_CONST_WSZ 0x800 /* word size */ #define ALL_GENERAL_REGS 0xffffffffu @@ -207,8 +206,6 @@ static bool tcg_target_const_match(int64_t val, int ct, return 1; } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { return 1; - } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) { - return 1; } else if ((ct & TCG_CT_CONST_P2M1) && use_mips32r2_instructions && is_p2m1(val)) { return 1; @@ -647,7 +644,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32); + tcg_debug_assert(use_mips32r2_instructions); tcg_out_opc_reg(s, OPC_SEB, rd, TCG_REG_ZERO, rs); } @@ -658,7 +655,7 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32); + tcg_debug_assert(use_mips32r2_instructions); tcg_out_opc_reg(s, OPC_SEH, rd, TCG_REG_ZERO, rs); } @@ -702,39 +699,6 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, g_assert_not_reached(); } -static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags) -{ - /* ret and arg can't be register tmp0 */ - tcg_debug_assert(ret != TCG_TMP0); - tcg_debug_assert(arg != TCG_TMP0); - - /* With arg = abcd: */ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ - if (flags & TCG_BSWAP_OS) { - tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ - } - return; - } - - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ - if (!(flags & TCG_BSWAP_IZ)) { - tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ - } - if (flags & TCG_BSWAP_OS) { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ - tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ - } else { - tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ - if (flags & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ - } - } - tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ -} - static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) { if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) { @@ -743,39 +707,6 @@ static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub) } } -static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); - tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); - if (flags & TCG_BSWAP_OZ) { - tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); - } - } else { - if (flags & TCG_BSWAP_OZ) { - tcg_out_bswap_subr(s, bswap32u_addr); - } else { - tcg_out_bswap_subr(s, bswap32_addr); - } - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - -static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg) -{ - if (use_mips32r2_instructions) { - tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); - tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); - } else { - tcg_out_bswap_subr(s, bswap64_addr); - /* delay slot -- never omit the insn, like tcg_out_mov might. */ - tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); - tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); - } -} - static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); @@ -831,55 +762,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, - TCGReg ah, TCGArg bl, TCGArg bh, bool cbl, - bool cbh, bool is_sub) -{ - TCGReg th = TCG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl); - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl); - tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl); - } - tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); - } else if (rl == al && rl == bl) { - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1); - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - } else { - tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0); - } -} - #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -952,15 +834,44 @@ static void tcg_out_setcond_end(TCGContext *s, TCGReg ret, int tmpflags) } } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg arg1, TCGReg arg2) { int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); tcg_out_setcond_end(s, ret, tmpflags); } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rz), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2); + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + /* If intermediate result is zero/non-zero: test != 0. */ + if (tmpflags & SETCOND_NEZ) { + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + tmp = ret; + } + /* Produce the 0/-1 result. */ + if (tmpflags & SETCOND_INV) { + tcg_out_opc_imm(s, OPC_ADDIU, ret, tmp, -1); + } else { + tcg_out_opc_reg(s, OPC_SUBU, ret, TCG_REG_ZERO, tmp); + } +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rz), + .out_rrr = tgen_negsetcond, +}; + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { static const MIPSInsn b_zero[16] = { [TCG_COND_LT] = OPC_BLTZ, @@ -1005,6 +916,16 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + +void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tgen_brcond(s, TCG_TYPE_I32, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, l); +} + static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) { @@ -1022,25 +943,37 @@ static int tcg_out_setcond2_int(TCGContext *s, TCGCond cond, TCGReg ret, break; default: - tcg_out_setcond(s, TCG_COND_EQ, TCG_TMP0, ah, bh); - tcg_out_setcond(s, tcg_unsigned_cond(cond), TCG_TMP1, al, bl); + tgen_setcond(s, TCG_TYPE_I32, TCG_COND_EQ, TCG_TMP0, ah, bh); + tgen_setcond(s, TCG_TYPE_I32, tcg_unsigned_cond(cond), + TCG_TMP1, al, bl); tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP0); - tcg_out_setcond(s, tcg_high_cond(cond), TCG_TMP0, ah, bh); + tgen_setcond(s, TCG_TYPE_I32, tcg_high_cond(cond), TCG_TMP0, ah, bh); tcg_out_opc_reg(s, OPC_OR, ret, TCG_TMP0, TCG_TMP1); break; } return ret | flags; } -static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { int tmpflags = tcg_out_setcond2_int(s, cond, ret, al, ah, bl, bh); tcg_out_setcond_end(s, ret, tmpflags); } -static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, - TCGReg bl, TCGReg bh, TCGLabel *l) +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rz, rz), + .out = tgen_setcond2, +}; + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) { int tmpflags = tcg_out_setcond2_int(s, cond, TCG_TMP0, al, ah, bl, bh); TCGReg tmp = tmpflags & ~SETCOND_FLAGS; @@ -1051,8 +984,17 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, tcg_out_nop(s); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2) +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rz, rz), + .out = tgen_brcond2, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { int tmpflags; bool eqz; @@ -1098,6 +1040,13 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = (use_mips32r6_instructions + ? C_O1_I4(r, r, rz, rz, rz) + : C_O1_I4(r, r, rz, rz, 0)), + .out = tgen_movcond, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { /* @@ -1438,29 +1387,66 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, true); + + if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { + tcg_out_qemu_ld_direct(s, data, 0, h.base, opc, type); + } else { + tcg_out_qemu_ld_unalign(s, data, 0, h.base, opc, type); + } + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = 0; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; HostAddress h; + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); ldst = prepare_host_addr(s, &h, addr, oi, true); if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { - tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type); + tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, type); } else { - tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type); + tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, type); } if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + /* Ensure that the mips32 code is compiled but discarded for mips64. */ + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_O2_I1(r, r, r) : C_NotImplemented, + .out = + TCG_TARGET_REG_BITS == 32 ? tgen_qemu_ld2 : NULL, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { @@ -1524,13 +1510,42 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, - TCGReg addr, MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addr, oi, false); + + if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { + tcg_out_qemu_st_direct(s, data, 0, h.base, opc); + } else { + tcg_out_qemu_st_unalign(s, data, 0, h.base, opc); + } + + if (ldst) { + ldst->type = type; + ldst->datalo_reg = data; + ldst->datahi_reg = 0; + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) { MemOp opc = get_memop(oi); TCGLabelQemuLdst *ldst; HostAddress h; + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); ldst = prepare_host_addr(s, &h, addr, oi, false); if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) { @@ -1540,14 +1555,22 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, } if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = datalo; ldst->datahi_reg = datahi; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + /* Ensure that the mips32 code is compiled but discarded for mips64. */ + .base.static_constraint = + TCG_TARGET_REG_BITS == 32 ? C_O0_I3(rz, rz, r) : C_NotImplemented, + .out = + TCG_TARGET_REG_BITS == 32 ? tgen_qemu_st2 : NULL, +}; + +static void tcg_out_mb(TCGContext *s, unsigned a0) { static const MIPSInsn sync[] = { /* Note that SYNC_MB is a slightly weaker than SYNC 0, @@ -1563,33 +1586,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, sync[a0 & TCG_MO_ALL]); } -static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6, - int width, TCGReg a0, TCGReg a1, TCGArg a2) -{ - if (use_mips32r6_instructions) { - if (a2 == width) { - tcg_out_opc_reg(s, opcv6, a0, a1, 0); - } else { - tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0); - tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0); - } - } else { - if (a2 == width) { - tcg_out_opc_reg(s, opcv2, a0, a1, a1); - } else if (a0 == a2) { - tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1); - } else if (a0 != a1) { - tcg_out_opc_reg(s, opcv2, a0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1); - } else { - tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); - tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1); - tcg_out_mov(s, TCG_TYPE_REG, a0, TCG_TMP0); - } - } -} - static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { TCGReg base = TCG_REG_ZERO; @@ -1649,616 +1645,809 @@ static void tcg_out_goto_tb(TCGContext *s, int which) } } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); + } else { + tcg_out_nop(s); + } +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { /* Always indirect, nothing to do */ } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - MIPSInsn i1, i2; - TCGArg a0, a1, a2; - int c2; + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ADDU : OPC_DADDU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ADDIU : OPC_DADDIU; + tcg_out_opc_imm(s, insn, a0, a1, a2); +} - switch (opc) { - case INDEX_op_goto_ptr: - /* jmp to the given host address (could be epilogue) */ - tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + int msb; + + if (a2 == (uint16_t)a2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); + return; + } + + tcg_debug_assert(use_mips32r2_instructions); + tcg_debug_assert(is_p2m1(a2)); + msb = ctz64(~a2) - 1; + if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); + } else { + tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0); + } +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rIK), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn opcv6 = type == TCG_TYPE_I32 ? OPC_CLZ_R6 : OPC_DCLZ_R6; + tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0); + tgen_movcond(s, TCG_TYPE_REG, TCG_COND_EQ, a0, a1, a2, false, + TCG_TMP0, false, TCG_REG_ZERO, false); + } else { + MIPSInsn opcv2 = type == TCG_TYPE_I32 ? OPC_CLZ : OPC_DCLZ; + if (a0 == a2) { + tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1); + } else if (a0 != a1) { + tcg_out_opc_reg(s, opcv2, a0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1); } else { - tcg_out_nop(s); + tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1); + tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1); + tcg_out_mov(s, type, a0, TCG_TMP0); } - break; - case INDEX_op_br: - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, - arg_label(a0)); - break; + } +} - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - i1 = OPC_LBU; - goto do_ldst; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - i1 = OPC_LB; - goto do_ldst; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - i1 = OPC_LHU; - goto do_ldst; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - i1 = OPC_LH; - goto do_ldst; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - i1 = OPC_LW; - goto do_ldst; - case INDEX_op_ld32u_i64: - i1 = OPC_LWU; - goto do_ldst; - case INDEX_op_ld_i64: - i1 = OPC_LD; - goto do_ldst; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - i1 = OPC_SB; - goto do_ldst; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - i1 = OPC_SH; - goto do_ldst; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - i1 = OPC_SW; - goto do_ldst; - case INDEX_op_st_i64: - i1 = OPC_SD; - do_ldst: - tcg_out_ldst(s, i1, a0, a1, a2); - break; +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 0) { + tgen_clz(s, type, a0, a1, TCG_REG_ZERO); + } else if (use_mips32r6_instructions) { + MIPSInsn opcv6 = type == TCG_TYPE_I32 ? OPC_CLZ_R6 : OPC_DCLZ_R6; + tcg_out_opc_reg(s, opcv6, a0, a1, 0); + } else { + MIPSInsn opcv2 = type == TCG_TYPE_I32 ? OPC_CLZ : OPC_DCLZ; + tcg_out_opc_reg(s, opcv2, a0, a1, a1); + } +} - case INDEX_op_add_i32: - i1 = OPC_ADDU, i2 = OPC_ADDIU; - goto do_binary; - case INDEX_op_add_i64: - i1 = OPC_DADDU, i2 = OPC_DADDIU; - goto do_binary; - case INDEX_op_or_i32: - case INDEX_op_or_i64: - i1 = OPC_OR, i2 = OPC_ORI; - goto do_binary; - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - i1 = OPC_XOR, i2 = OPC_XORI; - do_binary: - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, a2); - break; - } - do_binaryv: - tcg_out_opc_reg(s, i1, a0, a1, a2); - break; +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return use_mips32r2_instructions ? C_O1_I2(r, r, rzW) : C_NotImplemented; +} - case INDEX_op_sub_i32: - i1 = OPC_SUBU, i2 = OPC_ADDIU; - goto do_subtract; - case INDEX_op_sub_i64: - i1 = OPC_DSUBU, i2 = OPC_DADDIU; - do_subtract: - if (c2) { - tcg_out_opc_imm(s, i2, a0, a1, -a2); - break; - } - goto do_binaryv; - case INDEX_op_and_i32: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz32(~a2) - 1; - tcg_debug_assert(use_mips32r2_instructions); - tcg_debug_assert(is_p2m1(a2)); - tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; - case INDEX_op_and_i64: - if (c2 && a2 != (uint16_t)a2) { - int msb = ctz64(~a2) - 1; - tcg_debug_assert(use_mips32r2_instructions); - tcg_debug_assert(is_p2m1(a2)); - tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0); - break; - } - i1 = OPC_AND, i2 = OPC_ANDI; - goto do_binary; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - i1 = OPC_NOR; - goto do_binaryv; - - case INDEX_op_mul_i32: - if (use_mips32_instructions) { - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_mulsh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2); - break; - } - i1 = OPC_MULT, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_muluh_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2); - break; - } - i1 = OPC_MULTU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_div_i32: - if (use_mips32r6_instructions) { +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2); - break; + } else { + tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); } - i1 = OPC_DIV, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_divu_i32: - if (use_mips32r6_instructions) { + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIV : OPC_DDIV; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2); - break; - } - i1 = OPC_DIVU, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_rem_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); - break; + } else { + tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2); } - i1 = OPC_DIV, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_remu_i32: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); - break; + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIVU : OPC_DDIVU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_dsra(s, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; +#endif + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn; + + if (type == TCG_TYPE_I32) { + if (use_mips32_instructions) { + tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); + return; } - i1 = OPC_DIVU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_mul_i64: + insn = OPC_MULT; + } else { if (use_mips32r6_instructions) { tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2); - break; - } - i1 = OPC_DMULT, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_mulsh_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2); - break; - } - i1 = OPC_DMULT, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_muluh_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DMUHU, a0, a1, a2); - break; - } - i1 = OPC_DMULTU, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_div_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2); - break; - } - i1 = OPC_DDIV, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_divu_i64: - if (use_mips32r6_instructions) { - tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2); - break; + return; } - i1 = OPC_DDIVU, i2 = OPC_MFLO; - goto do_hilo1; - case INDEX_op_rem_i64: - if (use_mips32r6_instructions) { + insn = OPC_DMULT; + } + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULT : OPC_DMULT; + tcg_out_opc_reg(s, insn, 0, a2, a3); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); +} + +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return use_mips32r6_instructions ? C_NotImplemented : C_O2_I2(r, r, r, r); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MUH : OPC_DMUH; + tcg_out_opc_reg(s, insn, a0, a1, a2); + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULT : OPC_DMULT; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULTU : OPC_DMULTU; + tcg_out_opc_reg(s, insn, 0, a2, a3); + tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); + tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MUHU : OPC_DMUHU; + tcg_out_opc_reg(s, insn, a0, a1, a2); + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_MULTU : OPC_DMULTU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_NOR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2); + } else { tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2); - break; } - i1 = OPC_DDIV, i2 = OPC_MFHI; - goto do_hilo1; - case INDEX_op_remu_i64: - if (use_mips32r6_instructions) { + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIV : OPC_DDIV; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (use_mips32r6_instructions) { + if (type == TCG_TYPE_I32) { + tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2); + } else { tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2); - break; } - i1 = OPC_DDIVU, i2 = OPC_MFHI; - do_hilo1: - tcg_out_opc_reg(s, i1, 0, a1, a2); - tcg_out_opc_reg(s, i2, a0, 0, 0); - break; + } else { + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_DIVU : OPC_DDIVU; + tcg_out_opc_reg(s, insn, 0, a1, a2); + tcg_out_opc_reg(s, OPC_MFHI, a0, 0, 0); + } +} - case INDEX_op_muls2_i32: - i1 = OPC_MULT; - goto do_hilo2; - case INDEX_op_mulu2_i32: - i1 = OPC_MULTU; - goto do_hilo2; - case INDEX_op_muls2_i64: - i1 = OPC_DMULT; - goto do_hilo2; - case INDEX_op_mulu2_i64: - i1 = OPC_DMULTU; - do_hilo2: - tcg_out_opc_reg(s, i1, 0, a2, args[3]); - tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0); - tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0); - break; +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; - case INDEX_op_neg_i32: - i1 = OPC_SUBU; - goto do_unary; - case INDEX_op_neg_i64: - i1 = OPC_DSUBU; - goto do_unary; - case INDEX_op_not_i32: - case INDEX_op_not_i64: - i1 = OPC_NOR; - goto do_unary; - do_unary: - tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1); - break; +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, a0, a1, a2); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, a0, a1, 0); - break; - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, a0, a1, a2); - break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, a0, a1); - break; - case INDEX_op_extrh_i64_i32: - tcg_out_dsra(s, a0, a1, 32); - break; +static TCGConstraintSetIndex cset_rotr(TCGType type, unsigned flags) +{ + return use_mips32r2_instructions ? C_O1_I2(r, r, ri) : C_NotImplemented; +} - case INDEX_op_sar_i32: - i1 = OPC_SRAV, i2 = OPC_SRA; - goto do_shift; - case INDEX_op_shl_i32: - i1 = OPC_SLLV, i2 = OPC_SLL; - goto do_shift; - case INDEX_op_shr_i32: - i1 = OPC_SRLV, i2 = OPC_SRL; - goto do_shift; - case INDEX_op_rotr_i32: - i1 = OPC_ROTRV, i2 = OPC_ROTR; - do_shift: - if (c2) { - tcg_out_opc_sa(s, i2, a0, a1, a2); - break; - } - do_shiftv: - tcg_out_opc_reg(s, i1, a0, a2, a1); - break; - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2); - } else { - tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1); - } - break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_dsra(s, a0, a1, a2); - break; - } - i1 = OPC_DSRAV; - goto do_shiftv; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_dsll(s, a0, a1, a2); - break; +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_ROTRV : OPC_DROTRV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_ROTR, a0, a1, a2); + } else { + tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rotr, + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SRAV : OPC_DSRAV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SRA, a0, a1, a2); + } else { + tcg_out_dsra(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SLLV : OPC_DSLLV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SLL, a0, a1, a2); + } else { + tcg_out_dsll(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SRLV : OPC_DSRLV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_sa(s, OPC_SRL, a0, a1, a2); + } else { + tcg_out_dsrl(s, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + MIPSInsn insn = type == TCG_TYPE_I32 ? OPC_SUBU : OPC_DSUBU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg, unsigned flags) +{ + /* With arg = abcd: */ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */ + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */ + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */ } - i1 = OPC_DSLLV; - goto do_shiftv; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_dsrl(s, a0, a1, a2); - break; + return; + } + + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */ + if (!(flags & TCG_BSWAP_IZ)) { + tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */ + } + if (flags & TCG_BSWAP_OS) { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */ + tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */ + } else { + tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */ + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */ } - i1 = OPC_DSRLV; - goto do_shiftv; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2); - break; + } + tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */ +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg ret, TCGReg arg, unsigned flags) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); + tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0); } - i1 = OPC_DROTRV; - goto do_shiftv; - case INDEX_op_rotl_i64: - if (c2) { - tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, 64 - a2); + } else { + if (flags & TCG_BSWAP_OZ) { + tcg_out_bswap_subr(s, bswap32u_addr); } else { - tcg_out_opc_reg(s, OPC_DSUBU, TCG_TMP0, TCG_REG_ZERO, a2); - tcg_out_opc_reg(s, OPC_DROTRV, a0, TCG_TMP0, a1); + tcg_out_bswap_subr(s, bswap32_addr); } - break; + /* delay slot -- never omit the insn, like tcg_out_mov might. */ + tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); + tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); + } +} - case INDEX_op_clz_i32: - tcg_out_clz(s, OPC_CLZ, OPC_CLZ_R6, 32, a0, a1, a2); - break; - case INDEX_op_clz_i64: - tcg_out_clz(s, OPC_DCLZ, OPC_DCLZ_R6, 64, a0, a1, a2); - break; +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; - case INDEX_op_deposit_i32: - tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]); - break; - case INDEX_op_deposit_i64: +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + if (use_mips32r2_instructions) { + tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg); + tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret); + } else { + tcg_out_bswap_subr(s, bswap64_addr); + /* delay slot -- never omit the insn, like tcg_out_mov might. */ + tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO); + tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3); + } +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_INS, a0, a2, ofs + len - 1, ofs); + } else { tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2, - args[3] + args[4] - 1, args[3]); - break; + ofs + len - 1, ofs); + } +} - case INDEX_op_extract_i32: - if (a2 == 0 && args[3] <= 16) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2); - } - break; - case INDEX_op_extract_i64: - if (a2 == 0 && args[3] <= 16) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << args[3]) - 1); - } else { - tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, - a0, a1, args[3] - 1, a2); - } - break; +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rz), + .out_rrr = tgen_deposit, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 16) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_opc_bf(s, OPC_EXT, a0, a1, len - 1, ofs); + } else { + tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, + a0, a1, len - 1, ofs); + } +} - case INDEX_op_sextract_i64: - if (a2 == 0 && args[3] == 32) { +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: tcg_out_ext32s(s, a0, a1); - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); + return; } - break; + } + g_assert_not_reached(); +} - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5])); - break; +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]); - break; +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]); - break; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LBU, dest, base, offset); +} - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, a0, 0, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, a0, 0, a1, a2, TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, a0, a1, a2, args[3], TCG_TYPE_I64); - } - break; +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LB, dest, base, offset); +} - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true); - break; +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); - } +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LHU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LH, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LWU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LW, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SB, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SH, data, base, offset); } +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + + static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { - switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_neg_i32: - case INDEX_op_not_i32: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap32_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_extract_i32: - case INDEX_op_sextract_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i64: - case INDEX_op_neg_i64: - case INDEX_op_not_i64: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extrh_i64_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - return C_O1_I2(r, r, rJ); - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, rz, rN); - case INDEX_op_mul_i32: - case INDEX_op_mulsh_i32: - case INDEX_op_muluh_i32: - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - case INDEX_op_nor_i32: - case INDEX_op_setcond_i32: - case INDEX_op_mul_i64: - case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - case INDEX_op_nor_i64: - case INDEX_op_setcond_i64: - return C_O1_I2(r, rz, rz); - case INDEX_op_muls2_i32: - case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i64: - case INDEX_op_mulu2_i64: - return C_O2_I2(r, r, r, r); - case INDEX_op_and_i32: - case INDEX_op_and_i64: - return C_O1_I2(r, r, rIK); - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rI); - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotr_i32: - case INDEX_op_rotl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotr_i64: - case INDEX_op_rotl_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - return C_O1_I2(r, r, rzW); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rz); - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return (use_mips32r6_instructions - ? C_O1_I4(r, rz, rz, rz, rz) - : C_O1_I4(r, rz, rz, rz, 0)); - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rz, rz, rN, rN); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, rz, rz, rz, rz); - case INDEX_op_brcond2_i32: - return C_O0_I4(rz, rz, rz, rz); - - case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, r); - case INDEX_op_qemu_st_i32: - return C_O0_I2(rz, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rz, r) : C_O0_I3(rz, rz, r); - - default: - return C_NotImplemented; - } + return C_NotImplemented; } static const int tcg_target_callee_save_regs[] = { diff --git a/tcg/optimize.c b/tcg/optimize.c index f922f86..10a76c5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -30,14 +30,6 @@ #include "tcg-internal.h" #include "tcg-has.h" -#define CASE_OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) - -#define CASE_OP_32_64_VEC(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64): \ - glue(glue(case INDEX_op_, x), _vec) typedef struct MemCopyInfo { IntervalTreeNode itree; @@ -66,6 +58,7 @@ typedef struct OptContext { /* In flight values from optimization. */ TCGType type; + int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */ } OptContext; static inline TempOptInfo *ts_info(TCGTemp *ts) @@ -344,6 +337,18 @@ static TCGArg arg_new_temp(OptContext *ctx) return temp_arg(ts); } +static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg); +} + +static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op, + TCGOpcode opc, unsigned narg) +{ + return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg); +} + static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) { TCGTemp *dst_ts = arg_temp(dst); @@ -363,10 +368,8 @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) switch (ctx->type) { case TCG_TYPE_I32: - new_op = INDEX_op_mov_i32; - break; case TCG_TYPE_I64: - new_op = INDEX_op_mov_i64; + new_op = INDEX_op_mov; break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -409,162 +412,163 @@ static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val)); } -static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) +static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type, + uint64_t x, uint64_t y) { uint64_t l64, h64; switch (op) { - CASE_OP_32_64(add): + case INDEX_op_add: return x + y; - CASE_OP_32_64(sub): + case INDEX_op_sub: return x - y; - CASE_OP_32_64(mul): + case INDEX_op_mul: return x * y; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: return x & y; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: return x | y; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: return x ^ y; - case INDEX_op_shl_i32: - return (uint32_t)x << (y & 31); - - case INDEX_op_shl_i64: + case INDEX_op_shl: + if (type == TCG_TYPE_I32) { + return (uint32_t)x << (y & 31); + } return (uint64_t)x << (y & 63); - case INDEX_op_shr_i32: - return (uint32_t)x >> (y & 31); - - case INDEX_op_shr_i64: + case INDEX_op_shr: + if (type == TCG_TYPE_I32) { + return (uint32_t)x >> (y & 31); + } return (uint64_t)x >> (y & 63); - case INDEX_op_sar_i32: - return (int32_t)x >> (y & 31); - - case INDEX_op_sar_i64: + case INDEX_op_sar: + if (type == TCG_TYPE_I32) { + return (int32_t)x >> (y & 31); + } return (int64_t)x >> (y & 63); - case INDEX_op_rotr_i32: - return ror32(x, y & 31); - - case INDEX_op_rotr_i64: + case INDEX_op_rotr: + if (type == TCG_TYPE_I32) { + return ror32(x, y & 31); + } return ror64(x, y & 63); - case INDEX_op_rotl_i32: - return rol32(x, y & 31); - - case INDEX_op_rotl_i64: + case INDEX_op_rotl: + if (type == TCG_TYPE_I32) { + return rol32(x, y & 31); + } return rol64(x, y & 63); - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: return ~x; - CASE_OP_32_64(neg): + case INDEX_op_neg: return -x; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: return x & ~y; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: return x | ~y; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: return ~(x ^ y); - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: return ~(x & y); - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: return ~(x | y); - case INDEX_op_clz_i32: - return (uint32_t)x ? clz32(x) : y; - - case INDEX_op_clz_i64: + case INDEX_op_clz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? clz32(x) : y; + } return x ? clz64(x) : y; - case INDEX_op_ctz_i32: - return (uint32_t)x ? ctz32(x) : y; - - case INDEX_op_ctz_i64: + case INDEX_op_ctz: + if (type == TCG_TYPE_I32) { + return (uint32_t)x ? ctz32(x) : y; + } return x ? ctz64(x) : y; - case INDEX_op_ctpop_i32: - return ctpop32(x); - - case INDEX_op_ctpop_i64: - return ctpop64(x); - - CASE_OP_32_64(ext8s): - return (int8_t)x; + case INDEX_op_ctpop: + return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x); - CASE_OP_32_64(ext16s): - return (int16_t)x; - - CASE_OP_32_64(ext8u): - return (uint8_t)x; - - CASE_OP_32_64(ext16u): - return (uint16_t)x; - - CASE_OP_32_64(bswap16): + case INDEX_op_bswap16: x = bswap16(x); return y & TCG_BSWAP_OS ? (int16_t)x : x; - CASE_OP_32_64(bswap32): + case INDEX_op_bswap32: x = bswap32(x); return y & TCG_BSWAP_OS ? (int32_t)x : x; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: return bswap64(x); case INDEX_op_ext_i32_i64: - case INDEX_op_ext32s_i64: return (int32_t)x; case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: - case INDEX_op_ext32u_i64: return (uint32_t)x; case INDEX_op_extrh_i64_i32: return (uint64_t)x >> 32; - case INDEX_op_muluh_i32: - return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; - case INDEX_op_mulsh_i32: - return ((int64_t)(int32_t)x * (int32_t)y) >> 32; - - case INDEX_op_muluh_i64: + case INDEX_op_muluh: + if (type == TCG_TYPE_I32) { + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + } mulu64(&l64, &h64, x, y); return h64; - case INDEX_op_mulsh_i64: + + case INDEX_op_mulsh: + if (type == TCG_TYPE_I32) { + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + } muls64(&l64, &h64, x, y); return h64; - case INDEX_op_div_i32: + case INDEX_op_divs: /* Avoid crashing on divide by zero, otherwise undefined. */ - return (int32_t)x / ((int32_t)y ? : 1); - case INDEX_op_divu_i32: - return (uint32_t)x / ((uint32_t)y ? : 1); - case INDEX_op_div_i64: + if (type == TCG_TYPE_I32) { + return (int32_t)x / ((int32_t)y ? : 1); + } return (int64_t)x / ((int64_t)y ? : 1); - case INDEX_op_divu_i64: + + case INDEX_op_divu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x / ((uint32_t)y ? : 1); + } return (uint64_t)x / ((uint64_t)y ? : 1); - case INDEX_op_rem_i32: - return (int32_t)x % ((int32_t)y ? : 1); - case INDEX_op_remu_i32: - return (uint32_t)x % ((uint32_t)y ? : 1); - case INDEX_op_rem_i64: + case INDEX_op_rems: + if (type == TCG_TYPE_I32) { + return (int32_t)x % ((int32_t)y ? : 1); + } return (int64_t)x % ((int64_t)y ? : 1); - case INDEX_op_remu_i64: + + case INDEX_op_remu: + if (type == TCG_TYPE_I32) { + return (uint32_t)x % ((uint32_t)y ? : 1); + } return (uint64_t)x % ((uint64_t)y ? : 1); default: @@ -575,7 +579,7 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) static uint64_t do_constant_folding(TCGOpcode op, TCGType type, uint64_t x, uint64_t y) { - uint64_t res = do_constant_folding_2(op, x, y); + uint64_t res = do_constant_folding_2(op, type, x, y); if (type == TCG_TYPE_I32) { res = (int32_t)res; } @@ -725,12 +729,18 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, #define NO_DEST temp_arg(NULL) +static int pref_commutative(TempOptInfo *ti) +{ + /* Slight preference for non-zero constants second. */ + return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2; +} + static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += arg_is_const(a1); - sum -= arg_is_const(a2); + sum += pref_commutative(arg_info(a1)); + sum -= pref_commutative(arg_info(a2)); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -745,10 +755,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += arg_is_const(p1[0]); - sum += arg_is_const(p1[1]); - sum -= arg_is_const(p2[0]); - sum -= arg_is_const(p2[1]); + sum += pref_commutative(arg_info(p1[0])); + sum += pref_commutative(arg_info(p1[1])); + sum -= pref_commutative(arg_info(p2[0])); + sum -= pref_commutative(arg_info(p2[1])); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -806,9 +816,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest, /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst) { - TCGOpcode and_opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_and_i32 : INDEX_op_and_i64); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, and_opc, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg tmp = arg_new_temp(ctx); op2->args[0] = tmp; @@ -901,8 +909,8 @@ static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args) /* Expand to AND with a temporary if no backend support. */ if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) { - TCGOp *op1 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); - TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_and_i32, 3); + TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3); TCGArg t1 = arg_new_temp(ctx); TCGArg t2 = arg_new_temp(ctx); @@ -1101,12 +1109,9 @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) switch (ctx->type) { case TCG_TYPE_I32: - not_op = INDEX_op_not_i32; - have_not = TCG_TARGET_HAS_not_i32; - break; case TCG_TYPE_I64: - not_op = INDEX_op_not_i64; - have_not = TCG_TARGET_HAS_not_i64; + not_op = INDEX_op_not; + have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0); break; case TCG_TYPE_V64: case TCG_TYPE_V128: @@ -1197,8 +1202,10 @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) * 3) those that produce information about the result value. */ +static bool fold_addco(OptContext *ctx, TCGOp *op); static bool fold_or(OptContext *ctx, TCGOp *op); static bool fold_orc(OptContext *ctx, TCGOp *op); +static bool fold_subbo(OptContext *ctx, TCGOp *op); static bool fold_xor(OptContext *ctx, TCGOp *op); static bool fold_add(OptContext *ctx, TCGOp *op) @@ -1220,80 +1227,168 @@ static bool fold_add_vec(OptContext *ctx, TCGOp *op) return finish_folding(ctx, op); } -static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) +static void squash_prev_carryout(OptContext *ctx, TCGOp *op) { - bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]); - bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]); + TempOptInfo *t2; - if (a_const && b_const) { - uint64_t al = arg_info(op->args[2])->val; - uint64_t ah = arg_info(op->args[3])->val; - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; - TCGArg rl, rh; - TCGOp *op2; + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_addco: + op->opc = INDEX_op_add; + fold_add(ctx, op); + break; + case INDEX_op_addcio: + op->opc = INDEX_op_addci; + break; + case INDEX_op_addc1o: + op->opc = INDEX_op_add; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, 1); + } + break; + default: + g_assert_not_reached(); + } +} - if (ctx->type == TCG_TYPE_I32) { - uint64_t a = deposit64(al, 32, 32, ah); - uint64_t b = deposit64(bl, 32, 32, bh); +static bool fold_addci(OptContext *ctx, TCGOp *op) +{ + fold_commutative(ctx, op); - if (add) { - a += b; - } else { - a -= b; - } + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_carryout(ctx, op); + op->opc = INDEX_op_add; + + if (ctx->carry_state > 0) { + TempOptInfo *t2 = arg_info(op->args[2]); - al = sextract64(a, 0, 32); - ah = sextract64(a, 32, 32); + /* + * Propagate the known carry-in into a constant, if possible. + * Otherwise emit a second add +1. + */ + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1); } else { - Int128 a = int128_make128(al, ah); - Int128 b = int128_make128(bl, bh); + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3); - if (add) { - a = int128_add(a, b); - } else { - a = int128_sub(a, b); - } + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_add(ctx, op2); - al = int128_getlo(a); - ah = int128_gethi(a); + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, 1); } + } - rl = op->args[0]; - rh = op->args[1]; + ctx->carry_state = -1; + return fold_add(ctx, op); +} - /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); +static bool fold_addcio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t sum, max; - tcg_opt_gen_movi(ctx, op, rl, al); - tcg_opt_gen_movi(ctx, op2, rh, ah); - return true; + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) && + !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) { + carry_out = 0; } - /* Fold sub2 r,x,i to add2 r,x,-i */ - if (!add && b_const) { - uint64_t bl = arg_info(op->args[4])->val; - uint64_t bh = arg_info(op->args[5])->val; + if (ctx->carry_state < 0) { + ctx->carry_state = carry_out; + return finish_folding(ctx, op); + } - /* Negate the two parts without assembling and disassembling. */ - bl = -bl; - bh = ~bh + !bl; + squash_prev_carryout(ctx, op); + if (ctx->carry_state == 0) { + goto do_addco; + } - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add2_i32 : INDEX_op_add2_i64); - op->args[4] = arg_new_constant(ctx, bl); - op->args[5] = arg_new_constant(ctx, bh); + /* Propagate the known carry-in into a constant, if possible. */ + max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + if (ti_is_const(t2)) { + uint64_t v = ti_const_val(t2) & max; + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + /* max + known carry in produces known carry out. */ + carry_out = 1; } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1) & max; + if (v < max) { + op->args[1] = arg_new_constant(ctx, v + 1); + goto do_addco; + } + carry_out = 1; + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_addc1o; + ctx->carry_state = carry_out; return finish_folding(ctx, op); + + do_addco: + op->opc = INDEX_op_addco; + return fold_addco(ctx, op); } -static bool fold_add2(OptContext *ctx, TCGOp *op) +static bool fold_addco(OptContext *ctx, TCGOp *op) { - /* Note that the high and low parts may be independently swapped. */ - swap_commutative(op->args[0], &op->args[2], &op->args[4]); - swap_commutative(op->args[1], &op->args[3], &op->args[5]); + TempOptInfo *t1, *t2; + int carry_out = -1; + uint64_t ign; - return fold_addsub2(ctx, op, true); + fold_commutative(ctx, op); + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + + if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + /* Given sign-extension of z_mask for I32, we need not truncate. */ + carry_out = uadd64_overflow(v1, v2, &ign); + } else if (v2 == 0) { + carry_out = 0; + } + } else { + /* + * The z_mask value is >= the maximum value that can be represented + * with the known zero bits. So adding the z_mask values will not + * overflow if and only if the true values cannot overflow. + */ + if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) { + carry_out = 0; + } + } + ctx->carry_state = carry_out; + return finish_folding(ctx, op); } static bool fold_and(OptContext *ctx, TCGOp *op) @@ -1348,6 +1443,25 @@ static bool fold_andc(OptContext *ctx, TCGOp *op) t2 = arg_info(op->args[2]); z_mask = t1->z_mask; + if (ti_is_const(t2)) { + /* Fold andc r,x,i to and r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_and; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_and_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_and(ctx, op); + } + /* * Known-zeros does not imply known-ones. Therefore unless * arg2 is constant, we can't infer anything from it. @@ -1504,14 +1618,14 @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op) break; do_brcond_low: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[1] = op->args[2]; op->args[2] = cond; op->args[3] = label; return fold_brcond(ctx, op); do_brcond_high: - op->opc = INDEX_op_brcond_i32; + op->opc = INDEX_op_brcond; op->args[0] = op->args[1]; op->args[1] = op->args[3]; op->args[2] = cond; @@ -1547,17 +1661,15 @@ static bool fold_bswap(OptContext *ctx, TCGOp *op) z_mask = t1->z_mask; switch (op->opc) { - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + case INDEX_op_bswap16: z_mask = bswap16(z_mask); sign = INT16_MIN; break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + case INDEX_op_bswap32: z_mask = bswap32(z_mask); sign = INT32_MIN; break; - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: z_mask = bswap64(z_mask); sign = INT64_MIN; break; @@ -1713,8 +1825,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) TempOptInfo *t2 = arg_info(op->args[2]); int ofs = op->args[3]; int len = op->args[4]; - int width; - TCGOpcode and_opc; + int width = 8 * tcg_type_size(ctx->type); uint64_t z_mask, s_mask; if (ti_is_const(t1) && ti_is_const(t2)) { @@ -1723,24 +1834,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) ti_const_val(t2))); } - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - width = 32; - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - width = 64; - break; - default: - g_assert_not_reached(); - } - /* Inserting a value into zero at offset 0. */ if (ti_is_const_val(t1, 0) && ofs == 0) { uint64_t mask = MAKE_64BIT_MASK(0, len); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = op->args[2]; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); @@ -1750,7 +1848,7 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) if (ti_is_const_val(t2, 0)) { uint64_t mask = deposit64(-1, ofs, len, 0); - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[2] = arg_new_constant(ctx, mask); return fold_and(ctx, op); } @@ -1803,6 +1901,7 @@ static bool fold_dup2(OptContext *ctx, TCGOp *op) static bool fold_eqv(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2_commutative(ctx, op) || fold_xi_to_x(ctx, op, -1) || @@ -1810,8 +1909,28 @@ static bool fold_eqv(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold eqv r,x,i to xor r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_xor; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_xor_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_xor(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } @@ -1843,12 +1962,12 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) uint64_t v2 = arg_info(op->args[2])->val; int shr = op->args[3]; - if (op->opc == INDEX_op_extract2_i64) { - v1 >>= shr; - v2 <<= 64 - shr; - } else { + if (ctx->type == TCG_TYPE_I32) { v1 = (uint32_t)v1 >> shr; v2 = (uint64_t)((int32_t)v2 << (32 - shr)); + } else { + v1 >>= shr; + v2 <<= 64 - shr; } return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); } @@ -1857,8 +1976,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) static bool fold_exts(OptContext *ctx, TCGOp *op) { - uint64_t s_mask_old, s_mask, z_mask; - bool type_change = false; + uint64_t s_mask, z_mask; TempOptInfo *t1; if (fold_const1(ctx, op)) { @@ -1868,72 +1986,38 @@ static bool fold_exts(OptContext *ctx, TCGOp *op) t1 = arg_info(op->args[1]); z_mask = t1->z_mask; s_mask = t1->s_mask; - s_mask_old = s_mask; switch (op->opc) { - CASE_OP_32_64(ext8s): - s_mask |= INT8_MIN; - z_mask = (int8_t)z_mask; - break; - CASE_OP_32_64(ext16s): - s_mask |= INT16_MIN; - z_mask = (int16_t)z_mask; - break; case INDEX_op_ext_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32s_i64: s_mask |= INT32_MIN; z_mask = (int32_t)z_mask; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) { - return true; - } - return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_extu(OptContext *ctx, TCGOp *op) { - uint64_t z_mask_old, z_mask; - bool type_change = false; + uint64_t z_mask; if (fold_const1(ctx, op)) { return true; } - z_mask_old = z_mask = arg_info(op->args[1])->z_mask; - + z_mask = arg_info(op->args[1])->z_mask; switch (op->opc) { - CASE_OP_32_64(ext8u): - z_mask = (uint8_t)z_mask; - break; - CASE_OP_32_64(ext16u): - z_mask = (uint16_t)z_mask; - break; case INDEX_op_extrl_i64_i32: case INDEX_op_extu_i32_i64: - type_change = true; - QEMU_FALLTHROUGH; - case INDEX_op_ext32u_i64: z_mask = (uint32_t)z_mask; break; case INDEX_op_extrh_i64_i32: - type_change = true; z_mask >>= 32; break; default: g_assert_not_reached(); } - - if (!type_change && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) { - return true; - } - return fold_masks_z(ctx, op, z_mask); } @@ -1999,42 +2083,20 @@ static bool fold_movcond(OptContext *ctx, TCGOp *op) if (ti_is_const(tt) && ti_is_const(ft)) { uint64_t tv = ti_const_val(tt); uint64_t fv = ti_const_val(ft); - TCGOpcode opc, negopc = 0; TCGCond cond = op->args[5]; - switch (ctx->type) { - case TCG_TYPE_I32: - opc = INDEX_op_setcond_i32; - if (TCG_TARGET_HAS_negsetcond_i32) { - negopc = INDEX_op_negsetcond_i32; - } - tv = (int32_t)tv; - fv = (int32_t)fv; - break; - case TCG_TYPE_I64: - opc = INDEX_op_setcond_i64; - if (TCG_TARGET_HAS_negsetcond_i64) { - negopc = INDEX_op_negsetcond_i64; - } - break; - default: - g_assert_not_reached(); - } - if (tv == 1 && fv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; op->args[3] = cond; } else if (fv == 1 && tv == 0) { - op->opc = opc; + op->opc = INDEX_op_setcond; + op->args[3] = tcg_invert_cond(cond); + } else if (tv == -1 && fv == 0) { + op->opc = INDEX_op_negsetcond; + op->args[3] = cond; + } else if (fv == -1 && tv == 0) { + op->opc = INDEX_op_negsetcond; op->args[3] = tcg_invert_cond(cond); - } else if (negopc) { - if (tv == -1 && fv == 0) { - op->opc = negopc; - op->args[3] = cond; - } else if (fv == -1 && tv == 0) { - op->opc = negopc; - op->args[3] = tcg_invert_cond(cond); - } } } @@ -2072,21 +2134,23 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) TCGOp *op2; switch (op->opc) { - case INDEX_op_mulu2_i32: - l = (uint64_t)(uint32_t)a * (uint32_t)b; - h = (int32_t)(l >> 32); - l = (int32_t)l; - break; - case INDEX_op_muls2_i32: - l = (int64_t)(int32_t)a * (int32_t)b; - h = l >> 32; - l = (int32_t)l; - break; - case INDEX_op_mulu2_i64: - mulu64(&l, &h, a, b); + case INDEX_op_mulu2: + if (ctx->type == TCG_TYPE_I32) { + l = (uint64_t)(uint32_t)a * (uint32_t)b; + h = (int32_t)(l >> 32); + l = (int32_t)l; + } else { + mulu64(&l, &h, a, b); + } break; - case INDEX_op_muls2_i64: - muls64(&l, &h, a, b); + case INDEX_op_muls2: + if (ctx->type == TCG_TYPE_I32) { + l = (int64_t)(int32_t)a * (int32_t)b; + h = l >> 32; + l = (int32_t)l; + } else { + muls64(&l, &h, a, b); + } break; default: g_assert_not_reached(); @@ -2096,7 +2160,7 @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op) rh = op->args[1]; /* The proper opcode is supplied by tcg_opt_gen_mov. */ - op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); + op2 = opt_insert_before(ctx, op, 0, 2); tcg_opt_gen_movi(ctx, op, rl, l); tcg_opt_gen_movi(ctx, op2, rh, h); @@ -2176,6 +2240,7 @@ static bool fold_or(OptContext *ctx, TCGOp *op) static bool fold_orc(OptContext *ctx, TCGOp *op) { uint64_t s_mask; + TempOptInfo *t1, *t2; if (fold_const2(ctx, op) || fold_xx_to_i(ctx, op, -1) || @@ -2184,8 +2249,28 @@ static bool fold_orc(OptContext *ctx, TCGOp *op) return true; } - s_mask = arg_info(op->args[1])->s_mask - & arg_info(op->args[2])->s_mask; + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + /* Fold orc r,x,i to or r,x,~i. */ + switch (ctx->type) { + case TCG_TYPE_I32: + case TCG_TYPE_I64: + op->opc = INDEX_op_or; + break; + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + op->opc = INDEX_op_or_vec; + break; + default: + g_assert_not_reached(); + } + op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2)); + return fold_or(ctx, op); + } + + t1 = arg_info(op->args[1]); + s_mask = t1->s_mask & t2->s_mask; return fold_masks_s(ctx, op, s_mask); } @@ -2300,34 +2385,17 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) break; } if (convert) { - TCGOpcode add_opc, xor_opc, neg_opc; - if (!inv && !neg) { return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); } - switch (ctx->type) { - case TCG_TYPE_I32: - add_opc = INDEX_op_add_i32; - neg_opc = INDEX_op_neg_i32; - xor_opc = INDEX_op_xor_i32; - break; - case TCG_TYPE_I64: - add_opc = INDEX_op_add_i64; - neg_opc = INDEX_op_neg_i64; - xor_opc = INDEX_op_xor_i64; - break; - default: - g_assert_not_reached(); - } - if (!inv) { - op->opc = neg_opc; + op->opc = INDEX_op_neg; } else if (neg) { - op->opc = add_opc; + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -1); } else { - op->opc = xor_opc; + op->opc = INDEX_op_xor; op->args[2] = arg_new_constant(ctx, 1); } return -1; @@ -2338,8 +2406,6 @@ static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg) static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) { - TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc; - TCGOpcode uext_opc = 0, sext_opc = 0; TCGCond cond = op->args[3]; TCGArg ret, src1, src2; TCGOp *op2; @@ -2358,77 +2424,46 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg) } sh = ctz64(val); - switch (ctx->type) { - case TCG_TYPE_I32: - and_opc = INDEX_op_and_i32; - sub_opc = INDEX_op_sub_i32; - xor_opc = INDEX_op_xor_i32; - shr_opc = INDEX_op_shr_i32; - neg_opc = INDEX_op_neg_i32; - if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) { - uext_opc = INDEX_op_extract_i32; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) { - sext_opc = INDEX_op_sextract_i32; - } - break; - case TCG_TYPE_I64: - and_opc = INDEX_op_and_i64; - sub_opc = INDEX_op_sub_i64; - xor_opc = INDEX_op_xor_i64; - shr_opc = INDEX_op_shr_i64; - neg_opc = INDEX_op_neg_i64; - if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) { - uext_opc = INDEX_op_extract_i64; - } - if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) { - sext_opc = INDEX_op_sextract_i64; - } - break; - default: - g_assert_not_reached(); - } - ret = op->args[0]; src1 = op->args[1]; inv = cond == TCG_COND_TSTEQ; - if (sh && sext_opc && neg && !inv) { - op->opc = sext_opc; + if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_sextract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; return; - } else if (sh && uext_opc) { - op->opc = uext_opc; + } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) { + op->opc = INDEX_op_extract; op->args[1] = src1; op->args[2] = sh; op->args[3] = 1; } else { if (sh) { - op2 = tcg_op_insert_before(ctx->tcg, op, shr_opc, 3); + op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3); op2->args[0] = ret; op2->args[1] = src1; op2->args[2] = arg_new_constant(ctx, sh); src1 = ret; } - op->opc = and_opc; + op->opc = INDEX_op_and; op->args[1] = src1; op->args[2] = arg_new_constant(ctx, 1); } if (neg && inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, sub_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_add, 3); op2->args[0] = ret; op2->args[1] = ret; - op2->args[2] = arg_new_constant(ctx, 1); + op2->args[2] = arg_new_constant(ctx, -1); } else if (inv) { - op2 = tcg_op_insert_after(ctx->tcg, op, xor_opc, 3); + op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3); op2->args[0] = ret; op2->args[1] = ret; op2->args[2] = arg_new_constant(ctx, 1); } else if (neg) { - op2 = tcg_op_insert_after(ctx->tcg, op, neg_opc, 2); + op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2); op2->args[0] = ret; op2->args[1] = ret; } @@ -2540,14 +2575,14 @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op) do_setcond_low: op->args[2] = op->args[3]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); do_setcond_high: op->args[1] = op->args[2]; op->args[2] = op->args[4]; op->args[3] = cond; - op->opc = INDEX_op_setcond_i32; + op->opc = INDEX_op_setcond; return fold_setcond(ctx, op); } @@ -2607,13 +2642,13 @@ static bool fold_shift(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(sar): + case INDEX_op_sar: /* * Arithmetic right shift will not reduce the number of * input sign repetitions. */ return fold_masks_s(ctx, op, s_mask); - CASE_OP_32_64(shr): + case INDEX_op_shr: /* * If the sign bit is known zero, then logical right shift * will not reduce the number of input sign repetitions. @@ -2640,11 +2675,8 @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) switch (ctx->type) { case TCG_TYPE_I32: - neg_op = INDEX_op_neg_i32; - have_neg = true; - break; case TCG_TYPE_I64: - neg_op = INDEX_op_neg_i64; + neg_op = INDEX_op_neg; have_neg = true; break; case TCG_TYPE_V64: @@ -2689,16 +2721,149 @@ static bool fold_sub(OptContext *ctx, TCGOp *op) if (arg_is_const(op->args[2])) { uint64_t val = arg_info(op->args[2])->val; - op->opc = (ctx->type == TCG_TYPE_I32 - ? INDEX_op_add_i32 : INDEX_op_add_i64); + op->opc = INDEX_op_add; op->args[2] = arg_new_constant(ctx, -val); } return finish_folding(ctx, op); } -static bool fold_sub2(OptContext *ctx, TCGOp *op) +static void squash_prev_borrowout(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + + op = QTAILQ_PREV(op, link); + switch (op->opc) { + case INDEX_op_subbo: + op->opc = INDEX_op_sub; + fold_sub(ctx, op); + break; + case INDEX_op_subbio: + op->opc = INDEX_op_subbi; + break; + case INDEX_op_subb1o: + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->opc = INDEX_op_add; + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + /* Perform other constant folding, if needed. */ + fold_add(ctx, op); + } else { + TCGArg ret = op->args[0]; + op->opc = INDEX_op_sub; + op = opt_insert_after(ctx, op, INDEX_op_add, 3); + op->args[0] = ret; + op->args[1] = ret; + op->args[2] = arg_new_constant(ctx, -1); + } + break; + default: + g_assert_not_reached(); + } +} + +static bool fold_subbi(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t2; + int borrow_in = ctx->carry_state; + + if (borrow_in < 0) { + return finish_folding(ctx, op); + } + ctx->carry_state = -1; + + squash_prev_borrowout(ctx, op); + if (borrow_in == 0) { + op->opc = INDEX_op_sub; + return fold_sub(ctx, op); + } + + /* + * Propagate the known carry-in into any constant, then negate to + * transform from sub to add. If there is no constant, emit a + * separate add -1. + */ + t2 = arg_info(op->args[2]); + if (ti_is_const(t2)) { + op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1)); + } else { + TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3); + + op2->args[0] = op->args[0]; + op2->args[1] = op->args[1]; + op2->args[2] = op->args[2]; + fold_sub(ctx, op2); + + op->args[1] = op->args[0]; + op->args[2] = arg_new_constant(ctx, -1); + } + op->opc = INDEX_op_add; + return fold_add(ctx, op); +} + +static bool fold_subbio(OptContext *ctx, TCGOp *op) +{ + TempOptInfo *t1, *t2; + int borrow_out = -1; + + if (ctx->carry_state < 0) { + return finish_folding(ctx, op); + } + + squash_prev_borrowout(ctx, op); + if (ctx->carry_state == 0) { + goto do_subbo; + } + + t1 = arg_info(op->args[1]); + t2 = arg_info(op->args[2]); + + /* Propagate the known borrow-in into a constant, if possible. */ + if (ti_is_const(t2)) { + uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX; + uint64_t v = ti_const_val(t2) & max; + + if (v < max) { + op->args[2] = arg_new_constant(ctx, v + 1); + goto do_subbo; + } + /* subtracting max + 1 produces known borrow out. */ + borrow_out = 1; + } + if (ti_is_const(t1)) { + uint64_t v = ti_const_val(t1); + if (v != 0) { + op->args[2] = arg_new_constant(ctx, v - 1); + goto do_subbo; + } + } + + /* Adjust the opcode to remember the known carry-in. */ + op->opc = INDEX_op_subb1o; + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); + + do_subbo: + op->opc = INDEX_op_subbo; + return fold_subbo(ctx, op); +} + +static bool fold_subbo(OptContext *ctx, TCGOp *op) { - return fold_addsub2(ctx, op, false); + TempOptInfo *t1 = arg_info(op->args[1]); + TempOptInfo *t2 = arg_info(op->args[2]); + int borrow_out = -1; + + if (ti_is_const(t2)) { + uint64_t v2 = ti_const_val(t2); + if (v2 == 0) { + borrow_out = 0; + } else if (ti_is_const(t1)) { + uint64_t v1 = ti_const_val(t1); + borrow_out = v1 < v2; + } + } + ctx->carry_state = borrow_out; + return finish_folding(ctx, op); } static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) @@ -2707,22 +2872,22 @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) /* We can't do any folding with a load, but we can record bits. */ switch (op->opc) { - CASE_OP_32_64(ld8s): + case INDEX_op_ld8s: s_mask = INT8_MIN; break; - CASE_OP_32_64(ld8u): + case INDEX_op_ld8u: z_mask = MAKE_64BIT_MASK(0, 8); break; - CASE_OP_32_64(ld16s): + case INDEX_op_ld16s: s_mask = INT16_MIN; break; - CASE_OP_32_64(ld16u): + case INDEX_op_ld16u: z_mask = MAKE_64BIT_MASK(0, 16); break; - case INDEX_op_ld32s_i64: + case INDEX_op_ld32s: s_mask = INT32_MIN; break; - case INDEX_op_ld32u_i64: + case INDEX_op_ld32u: z_mask = MAKE_64BIT_MASK(0, 32); break; default: @@ -2765,19 +2930,16 @@ static bool fold_tcg_st(OptContext *ctx, TCGOp *op) } switch (op->opc) { - CASE_OP_32_64(st8): + case INDEX_op_st8: lm1 = 0; break; - CASE_OP_32_64(st16): + case INDEX_op_st16: lm1 = 1; break; - case INDEX_op_st32_i64: - case INDEX_op_st_i32: + case INDEX_op_st32: lm1 = 3; break; - case INDEX_op_st_i64: - lm1 = 7; - break; + case INDEX_op_st: case INDEX_op_st_vec: lm1 = tcg_type_size(ctx->type) - 1; break; @@ -2881,44 +3043,52 @@ void tcg_optimize(TCGContext *s) * Sorted alphabetically by opcode as much as possible. */ switch (opc) { - CASE_OP_32_64(add): + case INDEX_op_add: done = fold_add(&ctx, op); break; case INDEX_op_add_vec: done = fold_add_vec(&ctx, op); break; - CASE_OP_32_64(add2): - done = fold_add2(&ctx, op); + case INDEX_op_addci: + done = fold_addci(&ctx, op); + break; + case INDEX_op_addcio: + done = fold_addcio(&ctx, op); + break; + case INDEX_op_addco: + done = fold_addco(&ctx, op); break; - CASE_OP_32_64_VEC(and): + case INDEX_op_and: + case INDEX_op_and_vec: done = fold_and(&ctx, op); break; - CASE_OP_32_64_VEC(andc): + case INDEX_op_andc: + case INDEX_op_andc_vec: done = fold_andc(&ctx, op); break; - CASE_OP_32_64(brcond): + case INDEX_op_brcond: done = fold_brcond(&ctx, op); break; case INDEX_op_brcond2_i32: done = fold_brcond2(&ctx, op); break; - CASE_OP_32_64(bswap16): - CASE_OP_32_64(bswap32): - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: done = fold_bswap(&ctx, op); break; - CASE_OP_32_64(clz): - CASE_OP_32_64(ctz): + case INDEX_op_clz: + case INDEX_op_ctz: done = fold_count_zeros(&ctx, op); break; - CASE_OP_32_64(ctpop): + case INDEX_op_ctpop: done = fold_ctpop(&ctx, op); break; - CASE_OP_32_64(deposit): + case INDEX_op_deposit: done = fold_deposit(&ctx, op); break; - CASE_OP_32_64(div): - CASE_OP_32_64(divu): + case INDEX_op_divs: + case INDEX_op_divu: done = fold_divide(&ctx, op); break; case INDEX_op_dup_vec: @@ -2927,123 +3097,114 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64_VEC(eqv): + case INDEX_op_eqv: + case INDEX_op_eqv_vec: done = fold_eqv(&ctx, op); break; - CASE_OP_32_64(extract): + case INDEX_op_extract: done = fold_extract(&ctx, op); break; - CASE_OP_32_64(extract2): + case INDEX_op_extract2: done = fold_extract2(&ctx, op); break; - CASE_OP_32_64(ext8s): - CASE_OP_32_64(ext16s): - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: done = fold_exts(&ctx, op); break; - CASE_OP_32_64(ext8u): - CASE_OP_32_64(ext16u): - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: done = fold_extu(&ctx, op); break; - CASE_OP_32_64(ld8s): - CASE_OP_32_64(ld8u): - CASE_OP_32_64(ld16s): - CASE_OP_32_64(ld16u): - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: + case INDEX_op_ld8s: + case INDEX_op_ld8u: + case INDEX_op_ld16s: + case INDEX_op_ld16u: + case INDEX_op_ld32s: + case INDEX_op_ld32u: done = fold_tcg_ld(&ctx, op); break; - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: + case INDEX_op_ld: case INDEX_op_ld_vec: done = fold_tcg_ld_memcopy(&ctx, op); break; - CASE_OP_32_64(st8): - CASE_OP_32_64(st16): - case INDEX_op_st32_i64: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: done = fold_tcg_st(&ctx, op); break; - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_st: case INDEX_op_st_vec: done = fold_tcg_st_memcopy(&ctx, op); break; case INDEX_op_mb: done = fold_mb(&ctx, op); break; - CASE_OP_32_64_VEC(mov): + case INDEX_op_mov: + case INDEX_op_mov_vec: done = fold_mov(&ctx, op); break; - CASE_OP_32_64(movcond): + case INDEX_op_movcond: done = fold_movcond(&ctx, op); break; - CASE_OP_32_64(mul): + case INDEX_op_mul: done = fold_mul(&ctx, op); break; - CASE_OP_32_64(mulsh): - CASE_OP_32_64(muluh): + case INDEX_op_mulsh: + case INDEX_op_muluh: done = fold_mul_highpart(&ctx, op); break; - CASE_OP_32_64(muls2): - CASE_OP_32_64(mulu2): + case INDEX_op_muls2: + case INDEX_op_mulu2: done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64_VEC(nand): + case INDEX_op_nand: + case INDEX_op_nand_vec: done = fold_nand(&ctx, op); break; - CASE_OP_32_64(neg): + case INDEX_op_neg: done = fold_neg(&ctx, op); break; - CASE_OP_32_64_VEC(nor): + case INDEX_op_nor: + case INDEX_op_nor_vec: done = fold_nor(&ctx, op); break; - CASE_OP_32_64_VEC(not): + case INDEX_op_not: + case INDEX_op_not_vec: done = fold_not(&ctx, op); break; - CASE_OP_32_64_VEC(or): + case INDEX_op_or: + case INDEX_op_or_vec: done = fold_or(&ctx, op); break; - CASE_OP_32_64_VEC(orc): + case INDEX_op_orc: + case INDEX_op_orc_vec: done = fold_orc(&ctx, op); break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: done = fold_qemu_ld_1reg(&ctx, op); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - done = fold_qemu_ld_1reg(&ctx, op); - break; - } - QEMU_FALLTHROUGH; - case INDEX_op_qemu_ld_i128: + case INDEX_op_qemu_ld2: done = fold_qemu_ld_2reg(&ctx, op); break; - case INDEX_op_qemu_st8_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_st: + case INDEX_op_qemu_st2: done = fold_qemu_st(&ctx, op); break; - CASE_OP_32_64(rem): - CASE_OP_32_64(remu): + case INDEX_op_rems: + case INDEX_op_remu: done = fold_remainder(&ctx, op); break; - CASE_OP_32_64(rotl): - CASE_OP_32_64(rotr): - CASE_OP_32_64(sar): - CASE_OP_32_64(shl): - CASE_OP_32_64(shr): + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: done = fold_shift(&ctx, op); break; - CASE_OP_32_64(setcond): + case INDEX_op_setcond: done = fold_setcond(&ctx, op); break; - CASE_OP_32_64(negsetcond): + case INDEX_op_negsetcond: done = fold_negsetcond(&ctx, op); break; case INDEX_op_setcond2_i32: @@ -3058,19 +3219,26 @@ void tcg_optimize(TCGContext *s) case INDEX_op_bitsel_vec: done = fold_bitsel_vec(&ctx, op); break; - CASE_OP_32_64(sextract): + case INDEX_op_sextract: done = fold_sextract(&ctx, op); break; - CASE_OP_32_64(sub): + case INDEX_op_sub: done = fold_sub(&ctx, op); break; + case INDEX_op_subbi: + done = fold_subbi(&ctx, op); + break; + case INDEX_op_subbio: + done = fold_subbio(&ctx, op); + break; + case INDEX_op_subbo: + done = fold_subbo(&ctx, op); + break; case INDEX_op_sub_vec: done = fold_sub_vec(&ctx, op); break; - CASE_OP_32_64(sub2): - done = fold_sub2(&ctx, op); - break; - CASE_OP_32_64_VEC(xor): + case INDEX_op_xor: + case INDEX_op_xor_vec: done = fold_xor(&ctx, op); break; case INDEX_op_set_label: diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h index 453abde..da7a383 100644 --- a/tcg/ppc/tcg-target-con-set.h +++ b/tcg/ppc/tcg-target-con-set.h @@ -15,29 +15,29 @@ C_O0_I2(r, rC) C_O0_I2(v, r) C_O0_I3(r, r, r) C_O0_I3(o, m, r) -C_O0_I4(r, r, ri, ri) +C_O0_I4(r, r, rU, rC) C_O0_I4(r, r, r, r) C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) C_O1_I2(r, 0, rZ) -C_O1_I2(r, rI, ri) -C_O1_I2(r, rI, rT) +C_O1_I2(r, rI, r) C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rT) C_O1_I2(r, r, rU) +C_O1_I2(r, r, rZM) C_O1_I2(r, r, rZW) +C_O1_I2(r, rI, rN) +C_O1_I2(r, rZM, rZM) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(v, v, v, vZM, v) C_O1_I4(r, r, rC, rZ, rZ) -C_O1_I4(r, r, r, ri, ri) +C_O1_I4(r, r, r, rU, rC) C_O2_I1(r, r, r) C_N1O1_I1(o, m, r) C_O2_I2(r, r, r, r) -C_O2_I4(r, r, rI, rZM, r, r) -C_O2_I4(r, r, r, r, rI, rZM) diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h index 16b6872..faf92da 100644 --- a/tcg/ppc/tcg-target-con-str.h +++ b/tcg/ppc/tcg-target-con-str.h @@ -19,6 +19,7 @@ REGS('v', ALL_VECTOR_REGS) CONST('C', TCG_CT_CONST_CMP) CONST('I', TCG_CT_CONST_S16) CONST('M', TCG_CT_CONST_MONE) +CONST('N', TCG_CT_CONST_N16) CONST('T', TCG_CT_CONST_S32) CONST('U', TCG_CT_CONST_U32) CONST('W', TCG_CT_CONST_WSZ) diff --git a/tcg/ppc/tcg-target-has.h b/tcg/ppc/tcg-target-has.h index 6db91f7..81ec5ae 100644 --- a/tcg/ppc/tcg-target-has.h +++ b/tcg/ppc/tcg-target-has.h @@ -16,68 +16,9 @@ #define have_altivec (cpuinfo & CPUINFO_ALTIVEC) #define have_vsx (cpuinfo & CPUINFO_VSX) -/* optional instructions automatically implemented */ -#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ -#define TCG_TARGET_HAS_ext16u_i32 0 - /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 have_isa_3_00 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_eqv_i32 1 -#define TCG_TARGET_HAS_nand_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00 -#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06 -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 1 -#define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_add2_i32 0 -#define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 have_isa_3_00 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 -#define TCG_TARGET_HAS_nand_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00 -#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 #endif #define TCG_TARGET_HAS_qemu_ldst_i128 \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 822925a..2e94778 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -89,14 +89,15 @@ /* Shorthand for size of a register. */ #define SZR (TCG_TARGET_REG_BITS / 8) -#define TCG_CT_CONST_S16 0x100 -#define TCG_CT_CONST_U16 0x200 -#define TCG_CT_CONST_S32 0x400 -#define TCG_CT_CONST_U32 0x800 -#define TCG_CT_CONST_ZERO 0x1000 -#define TCG_CT_CONST_MONE 0x2000 -#define TCG_CT_CONST_WSZ 0x4000 -#define TCG_CT_CONST_CMP 0x8000 +#define TCG_CT_CONST_S16 0x00100 +#define TCG_CT_CONST_U16 0x00200 +#define TCG_CT_CONST_N16 0x00400 +#define TCG_CT_CONST_S32 0x00800 +#define TCG_CT_CONST_U32 0x01000 +#define TCG_CT_CONST_ZERO 0x02000 +#define TCG_CT_CONST_MONE 0x04000 +#define TCG_CT_CONST_WSZ 0x08000 +#define TCG_CT_CONST_CMP 0x10000 #define ALL_GENERAL_REGS 0xffffffffu #define ALL_VECTOR_REGS 0xffffffff00000000ull @@ -342,6 +343,9 @@ static bool tcg_target_const_match(int64_t sval, int ct, if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) { return 1; } + if ((ct & TCG_CT_CONST_N16) && -sval == (int16_t)-sval) { + return 1; + } if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) { return 1; } @@ -1012,111 +1016,6 @@ static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm) tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2); } -static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags) -{ - TCGReg tmp = dst == src ? TCG_REG_R0 : dst; - - if (have_isa_3_10) { - tcg_out32(s, BRH | RA(dst) | RS(src)); - if (flags & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext16u(s, dst, dst); - } - return; - } - - /* - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = xxxxabcd - */ - /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ - tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); - /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ - tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); - - if (flags & TCG_BSWAP_OS) { - tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); - } else { - tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); - } -} - -static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags) -{ - TCGReg tmp = dst == src ? TCG_REG_R0 : dst; - - if (have_isa_3_10) { - tcg_out32(s, BRW | RA(dst) | RS(src)); - if (flags & TCG_BSWAP_OS) { - tcg_out_ext32s(s, dst, dst); - } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, dst, dst); - } - return; - } - - /* - * Stolen from gcc's builtin_bswap32. - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = xxxxabcd - */ - /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ - tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); - /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ - tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); - /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ - tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); - - if (flags & TCG_BSWAP_OS) { - tcg_out_ext32s(s, dst, tmp); - } else { - tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); - } -} - -static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src) -{ - TCGReg t0 = dst == src ? TCG_REG_R0 : dst; - TCGReg t1 = dst == src ? dst : TCG_REG_R0; - - if (have_isa_3_10) { - tcg_out32(s, BRD | RA(dst) | RS(src)); - return; - } - - /* - * In the following, - * dep(a, b, m) -> (a & ~m) | (b & m) - * - * Begin with: src = abcdefgh - */ - /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ - tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); - /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ - tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); - /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ - tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); - - /* t0 = rol64(t0, 32) = hgfe0000 */ - tcg_out_rld(s, RLDICL, t0, t0, 32, 0); - /* t1 = rol64(src, 32) = efghabcd */ - tcg_out_rld(s, RLDICL, t1, src, 32, 0); - - /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ - tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); - /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ - tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); - /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ - tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); - - tcg_out_mov(s, TCG_TYPE_REG, dst, t0); -} - /* Emit a move into ret of arg, if it can be done in one insn. */ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) { @@ -1777,9 +1676,8 @@ static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2, } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, - int const_arg2, int cr, TCGType type) + bool const_arg2, int cr, TCGType type) { - int imm; uint32_t op; tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); @@ -1796,18 +1694,15 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_EQ: case TCG_COND_NE: if (const_arg2) { - if ((int16_t) arg2 == arg2) { + if ((int16_t)arg2 == arg2) { op = CMPI; - imm = 1; - break; - } else if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; break; } + tcg_debug_assert((uint16_t)arg2 == arg2); + op = CMPLI; + break; } op = CMPL; - imm = 0; break; case TCG_COND_TSTEQ: @@ -1821,14 +1716,11 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_LE: case TCG_COND_GT: if (const_arg2) { - if ((int16_t) arg2 == arg2) { - op = CMPI; - imm = 1; - break; - } + tcg_debug_assert((int16_t)arg2 == arg2); + op = CMPI; + break; } op = CMP; - imm = 0; break; case TCG_COND_LTU: @@ -1836,30 +1728,20 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, case TCG_COND_LEU: case TCG_COND_GTU: if (const_arg2) { - if ((uint16_t) arg2 == arg2) { - op = CMPLI; - imm = 1; - break; - } + tcg_debug_assert((uint16_t)arg2 == arg2); + op = CMPLI; + break; } op = CMPL; - imm = 0; break; default: g_assert_not_reached(); } op |= BF(cr) | ((type == TCG_TYPE_I64) << 21); - - if (imm) { - tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff)); - } else { - if (const_arg2) { - tcg_out_movi(s, type, TCG_REG_R0, arg2); - arg2 = TCG_REG_R0; - } - tcg_out32(s, op | RA(arg1) | RB(arg2)); - } + op |= RA(arg1); + op |= const_arg2 ? arg2 & 0xffff : RB(arg2); + tcg_out32(s, op); } static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, @@ -1926,8 +1808,8 @@ static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, } static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2, bool neg) + TCGReg arg0, TCGReg arg1, TCGArg arg2, + bool const_arg2, bool neg) { int sh; bool inv; @@ -2072,6 +1954,54 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + +void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + uint32_t insn = B; + + if (l->has_value) { + insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); + } + tcg_out32(s, insn); +} + static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd) { tcg_out32(s, tcg_to_bc[cond] | bd); @@ -2088,17 +2018,29 @@ static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l) tcg_out_bc(s, cond, bd); } -static void tcg_out_brcond(TCGContext *s, TCGCond cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - TCGLabel *l, TCGType type) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) +{ + tcg_out_cmp(s, cond, arg1, arg2, false, 0, type); + tcg_out_bc_lab(s, cond, l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *l) { - tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type); + tcg_out_cmp(s, cond, arg1, arg2, true, 0, type); tcg_out_bc_lab(s, cond, l); } -static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, - TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1, - TCGArg v2, bool const_c2) +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg v1, bool const_v1, TCGArg v2, bool const_v2) { /* If for some reason both inputs are zero, don't produce bad code. */ if (v1 == 0 && v2 == 0) { @@ -2144,6 +2086,11 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ), + .out = tgen_movcond, +}; + static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2) { @@ -2170,8 +2117,8 @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, } } -static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool blconst, TCGArg bh, bool bhconst) { static const struct { uint8_t bit1, bit2; } bits[] = { [TCG_COND_LT ] = { CR_LT, CR_LT }, @@ -2184,18 +2131,9 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, [TCG_COND_GEU] = { CR_GT, CR_LT }, }; - TCGCond cond = args[4], cond2; - TCGArg al, ah, bl, bh; - int blconst, bhconst; + TCGCond cond2; int op, bit1, bit2; - al = args[0]; - ah = args[1]; - bl = args[2]; - bh = args[3]; - blconst = const_args[2]; - bhconst = const_args[3]; - switch (cond) { case TCG_COND_EQ: op = CRAND; @@ -2247,22 +2185,42 @@ static void tcg_out_cmp2(TCGContext *s, const TCGArg *args, } } -static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) { - tcg_out_cmp2(s, args + 1, const_args + 1); + tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0)); - tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); + tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31); } -static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, - const int *const_args) +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, rU, rC), + .out = tgen_setcond2, +}; + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) { - tcg_out_cmp2(s, args, const_args); - tcg_out_bc_lab(s, TCG_COND_EQ, arg_label(args[5])); + assert(TCG_TARGET_REG_BITS == 32); + tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh); + tcg_out_bc_lab(s, TCG_COND_EQ, l); } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, rU, rC), + .out = tgen_brcond2, +}; + +static void tcg_out_mb(TCGContext *s, unsigned a0) { uint32_t insn; @@ -2737,6 +2695,60 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_qemu_ld(s, data, -1, addr, oi, type); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_qemu_ld(s, datalo, datahi, addr, oi, type); + } else { + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, true); + } +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_N1O1_I1(o, m, r) : C_O2_I1(r, r, r), + .out = tgen_qemu_ld2, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_qemu_st(s, data, -1, addr, oi, type); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_qemu_st(s, datalo, datahi, addr, oi, type); + } else { + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr, oi, false); + } +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r), + .out = tgen_qemu_st2, +}; + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; @@ -2885,6 +2897,13 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out32(s, MTSPR | RS(a0) | CTR); + tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); + tcg_out32(s, BCCTR | BO_ALWAYS); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2902,595 +2921,913 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGArg a0, a1, a2; + tcg_out32(s, ADD | TAB(a0, a1, a2)); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out32(s, MTSPR | RS(args[0]) | CTR); - tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0)); - tcg_out32(s, BCCTR | BO_ALWAYS); - break; - case INDEX_op_br: - { - TCGLabel *l = arg_label(args[0]); - uint32_t insn = B; - - if (l->has_value) { - insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr), - l->u.value_ptr); - } else { - tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); - } - tcg_out32(s, insn); - } - break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); - tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); - break; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); +} - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_32: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rT), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, a2); - } else { - tcg_out32(s, AND | SAB(a1, a0, a2)); - } - break; - case INDEX_op_or_i64: - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_ori32(s, a0, a1, a2); - } else { - tcg_out32(s, OR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_xor_i64: - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_xori32(s, a0, a1, a2); - } else { - tcg_out32(s, XOR | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi32(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_andi64(s, a0, a1, ~a2); - } else { - tcg_out32(s, ANDC | SAB(a1, a0, a2)); - } - break; - case INDEX_op_orc_i32: - if (const_args[2]) { - tcg_out_ori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_orc_i64: - tcg_out32(s, ORC | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_eqv_i32: - if (const_args[2]) { - tcg_out_xori32(s, args[0], args[1], ~args[2]); - break; - } - /* FALLTHRU */ - case INDEX_op_eqv_i64: - tcg_out32(s, EQV | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - tcg_out32(s, NAND | SAB(args[1], args[0], args[2])); - break; - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[2])); - break; +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ADDC | TAB(a0, a1, a2)); +} - case INDEX_op_clz_i32: - tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_ctz_i32: - tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_ctpop_i32: - tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0)); - break; +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, ADDIC | TAI(a0, a1, a2)); +} - case INDEX_op_clz_i64: - tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_ctz_i64: - tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1], - args[2], const_args[2]); - break; - case INDEX_op_ctpop_i64: - tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0)); - break; +static TCGConstraintSetIndex cset_addco(TCGType type, unsigned flags) +{ + /* + * Note that the CA bit is defined based on the word size of the + * environment. So in 64-bit mode it's always carry-out of bit 63. + * The fallback code using deposit works just as well for TCG_TYPE_I32. + */ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, rI) : C_NotImplemented; +} - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLW | TAB(a0, a1, a2)); - } - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addco, + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, +}; - case INDEX_op_div_i32: - tcg_out32(s, DIVW | TAB(args[0], args[1], args[2])); - break; +static void tgen_addcio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ADDE | TAB(a0, a1, a2)); +} - case INDEX_op_divu_i32: - tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); - break; +static void tgen_addcio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, (a2 ? ADDME : ADDZE) | RT(a0) | RA(a1)); +} - case INDEX_op_rem_i32: - tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); - break; +static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, rZM) : C_NotImplemented; +} - case INDEX_op_remu_i32: - tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); - break; +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, +}; - case INDEX_op_shl_i32: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shli32(s, args[0], args[1], args[2] & 31); - } else { - tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i32: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shri32(s, args[0], args[1], args[2] & 31); - } else { - tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i32: - if (const_args[2]) { - tcg_out_sari32(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31); - } else { - tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) - | MB(0) | ME(31)); - } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32)); - tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0) - | MB(0) | ME(31)); - } - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, +}; - case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I32); - break; - case INDEX_op_brcond_i64: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], - arg_label(args[3]), TCG_TYPE_I64); - break; - case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); - break; +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out32(s, SUBFC | TAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_R0)); +} - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - tcg_out32(s, NEG | RT(args[0]) | RA(args[1])); - break; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, AND | SAB(a1, a0, a2)); +} - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out32(s, NOR | SAB(args[1], args[0], args[1])); - break; +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_andi32(s, a0, a1, a2); + } else { + tcg_out_andi64(s, a0, a1, a2); + } +} - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); - } else { - tcg_out32(s, ADD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - if (const_args[2]) { - tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2); - } else { - tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); - } - } else if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out32(s, SUBF | TAB(a0, a2, a1)); - } - break; +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; - case INDEX_op_shl_i64: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shli64(s, args[0], args[1], args[2] & 63); - } else { - tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_shr_i64: - if (const_args[2]) { - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_shri64(s, args[0], args[1], args[2] & 63); - } else { - tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_sar_i64: - if (const_args[2]) { - tcg_out_sari64(s, args[0], args[1], args[2]); - } else { - tcg_out32(s, SRAD | SAB(args[1], args[0], args[2])); - } - break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0); - } else { - tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0)); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0); - } else { - tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64)); - tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0)); - } - break; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ANDC | SAB(a1, a0, a2)); +} - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out32(s, MULLI | TAI(a0, a1, a2)); - } else { - tcg_out32(s, MULLD | TAB(a0, a1, a2)); - } - break; - case INDEX_op_div_i64: - tcg_out32(s, DIVD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_divu_i64: - tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_rem_i64: - tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_remu_i64: - tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); - break; +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); - } else { - tcg_out_qemu_ld(s, args[0], args[1], args[2], - args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_ld_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); - break; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, false); +} - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64); - } else { - tcg_out_qemu_st(s, args[0], args[1], args[2], - args[3], TCG_TYPE_I64); - } - break; - case INDEX_op_qemu_st_i128: - tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); - break; +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, true); +} - case INDEX_op_setcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2], false); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2], false); - break; - case INDEX_op_negsetcond_i32: - tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], - const_args[2], true); - break; - case INDEX_op_negsetcond_i64: - tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2], - const_args[2], true); - break; - case INDEX_op_setcond2_i32: - tcg_out_setcond2(s, args, const_args); - break; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, rZW), + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - tcg_out_bswap16(s, args[0], args[1], args[2]); - break; - case INDEX_op_bswap32_i32: - tcg_out_bswap32(s, args[0], args[1], 0); - break; - case INDEX_op_bswap32_i64: - tcg_out_bswap32(s, args[0], args[1], args[2]); - break; - case INDEX_op_bswap64_i64: - tcg_out_bswap64(s, args[0], args[1]); - break; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD; + tcg_out32(s, insn | SAB(a1, a0, 0)); +} - case INDEX_op_deposit_i32: - if (const_args[2]) { - uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi32(s, args[0], args[0], ~mask); - } else { - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3], - 32 - args[3] - args[4], 31 - args[3]); - } - break; - case INDEX_op_deposit_i64: - if (const_args[2]) { - uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3]; - tcg_out_andi64(s, args[0], args[0], ~mask); - } else { - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3], - 64 - args[3] - args[4]); - } - break; +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented; +} - case INDEX_op_extract_i32: - if (args[2] == 0 && args[3] <= 16) { - tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1)); - break; - } - tcg_out_rlw(s, RLWINM, args[0], args[1], - 32 - args[2], 32 - args[3], 31); - break; - case INDEX_op_extract_i64: - if (args[2] == 0 && args[3] <= 16) { - tcg_out32(s, ANDI | SAI(args[1], args[0], (1 << args[3]) - 1)); - break; - } - tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]); - break; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; - case INDEX_op_sextract_i64: - if (args[2] + args[3] == 32) { - if (args[2] == 0) { - tcg_out_ext32s(s, args[0], args[1]); - } else { - tcg_out_sari32(s, args[0], args[1], args[2]); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (args[2] == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_I32, args[0], args[1]); - } else if (args[2] == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_I32, args[0], args[1]); - } else { - g_assert_not_reached(); - } - break; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, false); +} - case INDEX_op_movcond_i32: - tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2], - args[3], args[4], const_args[2]); - break; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD; + tcg_out_cntxz(s, type, insn, a0, a1, a2, true); +} + +static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags) +{ + return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; + +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, EQV | SAB(a1, a0, a2)); +} #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_add2_i64: -#else - case INDEX_op_add2_i32: +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_shri64(s, a0, a1, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; #endif - /* Note that the CA bit is defined based on the word size of the - environment. So in 64-bit mode it's always carry-out of bit 63. - The fallback code using deposit works just as well for 32-bit. */ - a0 = args[0], a1 = args[1]; - if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { - a0 = TCG_REG_R0; - } - if (const_args[4]) { - tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); - } else { - tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out32(s, MULLI | TAI(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mulsh, +}; + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_muluh, +}; + +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, NAND | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nand, +}; + +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, NOR | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, OR | SAB(a1, a0, a2)); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_ori32(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, ORC | SAB(a1, a0, a2)); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + +static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags) +{ + return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mod, + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD; + tcg_out32(s, insn | TAB(a0, a1, a2)); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mod, + .out_rrr = tgen_remu, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31)); + } else { + tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0)); + } +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31); + } else { + tcg_out_rld(s, RLDICL, a0, a1, a2, 0); + } +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_sari32(s, a0, a1, a2 & 31); + } else { + tcg_out_sari64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_shli32(s, a0, a1, a2 & 31); + } else { + tcg_out_shli64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD; + tcg_out32(s, insn | SAB(a1, a0, a2)); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + /* Limit immediate shift count lest we create an illegal insn. */ + if (type == TCG_TYPE_I32) { + tcg_out_shri32(s, a0, a1, a2 & 31); + } else { + tcg_out_shri64(s, a0, a1, a2 & 63); + } +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBF | TAB(a0, a2, a1)); +} + +static void tgen_subfi(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_out32(s, SUBFIC | TAI(a0, a2, a1)); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, rI, r), + .out_rrr = tgen_sub, + .out_rir = tgen_subfi, +}; + +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBFC | TAB(a0, a2, a1)); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == 0) { + tcg_out_movi(s, type, TCG_REG_R0, 0); + tgen_subbo_rrr(s, type, a0, a1, TCG_REG_R0); + } else { + tgen_addco_rri(s, type, a0, a1, -a2); + } +} + +/* The underlying insn for subfi is subfic. */ +#define tgen_subbo_rir tgen_subfi + +static void tgen_subbo_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, type, TCG_REG_R0, a2); + tgen_subbo_rir(s, type, a0, a1, TCG_REG_R0); +} + +static TCGConstraintSetIndex cset_subbo(TCGType type, unsigned flags) +{ + /* Recall that the CA bit is defined based on the host word size. */ + return type == TCG_TYPE_REG ? C_O1_I2(r, rI, rN) : C_NotImplemented; +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_subbo, + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, + .out_rir = tgen_subbo_rir, + .out_rii = tgen_subbo_rii, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, SUBFE | TAB(a0, a2, a1)); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_addcio_rri(s, type, a0, a1, ~a2); +} + +static void tgen_subbio_rir(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2) +{ + tcg_debug_assert(a1 == 0 || a1 == -1); + tcg_out32(s, (a1 ? SUBFME : SUBFZE) | RT(a0) | RA(a2)); +} + +static void tgen_subbio_rii(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2) +{ + tcg_out_movi(s, type, TCG_REG_R0, a2); + tgen_subbio_rir(s, type, a0, a1, TCG_REG_R0); +} + +static TCGConstraintSetIndex cset_subbio(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, rZM, rZM) : C_NotImplemented; +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_subbio, + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, + .out_rir = tgen_subbio_rir, + .out_rii = tgen_subbio_rii, +}; + +#define outop_subbi outop_subbio + +static void tcg_out_set_borrow(TCGContext *s) +{ + /* borrow = !carry */ + tcg_out32(s, ADDIC | TAI(TCG_REG_R0, TCG_REG_R0, 0)); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out32(s, XOR | SAB(a1, a0, a2)); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_xori32(s, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rU), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src, unsigned flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRH | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext16u(s, dst, dst); } - if (const_args[5]) { - tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); - } else { - tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */ + tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31); + /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */ + tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src, unsigned flags) +{ + TCGReg tmp = dst == src ? TCG_REG_R0 : dst; + + if (have_isa_3_10) { + tcg_out32(s, BRW | RA(dst) | RS(src)); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, dst); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, dst, dst); } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); + return; + } + + /* + * Stolen from gcc's builtin_bswap32. + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = xxxxabcd + */ + /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */ + tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31); + /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7); + /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */ + tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23); + + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, dst, tmp); + } else { + tcg_out_mov(s, TCG_TYPE_REG, dst, tmp); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src) +{ + TCGReg t0 = dst == src ? TCG_REG_R0 : dst; + TCGReg t1 = dst == src ? dst : TCG_REG_R0; + + if (have_isa_3_10) { + tcg_out32(s, BRD | RA(dst) | RS(src)); + return; + } + + /* + * In the following, + * dep(a, b, m) -> (a & ~m) | (b & m) + * + * Begin with: src = abcdefgh + */ + /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */ + tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31); + /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7); + /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */ + tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23); + + /* t0 = rol64(t0, 32) = hgfe0000 */ + tcg_out_rld(s, RLDICL, t0, t0, 32, 0); + /* t1 = rol64(src, 32) = efghabcd */ + tcg_out_rld(s, RLDICL, t1, src, 32, 0); + + /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31); + /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7); + /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */ + tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23); + + tcg_out_mov(s, TCG_TYPE_REG, dst, t0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; +#endif /* TCG_TARGET_REG_BITS == 64 */ + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out32(s, NEG | RT(a0) | RA(a1)); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs); + } else { + tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len); + } +} + +static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len) +{ + tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len)); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, 0, rZ), + .out_rrr = tgen_deposit, + .out_rri = tgen_depositi, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0 && len <= 16) { + tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1); + } else if (type == TCG_TYPE_I32) { + tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31); + } else { + tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len); + } +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; } - break; + } else if (ofs + len == 32) { + tcg_out_sari32(s, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LBZ, LBZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tgen_ld8u(s, type, dest, base, offset); + tcg_out_ext8s(s, type, dest, dest); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LHZ, LHZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LHA, LHAX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; #if TCG_TARGET_REG_BITS == 64 - case INDEX_op_sub2_i64: -#else - case INDEX_op_sub2_i32: +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LWZ, LWZX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, LWA, LWAX, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; #endif - a0 = args[0], a1 = args[1]; - if (a0 == args[5] || (!const_args[3] && a0 == args[3])) { - a0 = TCG_REG_R0; - } - if (const_args[2]) { - tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2])); - } else { - tcg_out32(s, SUBFC | TAB(a0, args[4], args[2])); - } - if (const_args[3]) { - tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5])); - } else { - tcg_out32(s, SUBFE | TAB(a1, args[5], args[3])); - } - if (a0 != args[0]) { - tcg_out_mov(s, TCG_TYPE_REG, args[0], a0); - } - break; - case INDEX_op_muluh_i32: - tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i32: - tcg_out32(s, MULHW | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_muluh_i64: - tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); - break; - case INDEX_op_mulsh_i64: - tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); - break; +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, STB, STBX, data, base, offset); +} - case INDEX_op_mb: - tcg_out_mb(s, args[0]); - break; +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); - } +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem_long(s, STH, STHX, data, base, offset); } +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + + int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) { switch (opc) { @@ -4098,154 +4435,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ctpop_i32: - case INDEX_op_neg_i32: - case INDEX_op_not_i32: - case INDEX_op_ext8s_i32: - case INDEX_op_ext16s_i32: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap32_i32: - case INDEX_op_extract_i32: - case INDEX_op_sextract_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_ctpop_i64: - case INDEX_op_neg_i64: - case INDEX_op_not_i64: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: + case INDEX_op_qemu_st: return C_O0_I2(r, r); - - case INDEX_op_add_i32: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_andc_i32: - case INDEX_op_orc_i32: - case INDEX_op_eqv_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_and_i64: - case INDEX_op_andc_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - return C_O1_I2(r, r, rI); - - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - case INDEX_op_nand_i32: - case INDEX_op_nor_i32: - case INDEX_op_muluh_i32: - case INDEX_op_mulsh_i32: - case INDEX_op_orc_i64: - case INDEX_op_eqv_i64: - case INDEX_op_nand_i64: - case INDEX_op_nor_i64: - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: - return C_O1_I2(r, r, r); - - case INDEX_op_sub_i32: - return C_O1_I2(r, rI, ri); - case INDEX_op_add_i64: - return C_O1_I2(r, r, rT); - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rU); - case INDEX_op_sub_i64: - return C_O1_I2(r, rI, rT); - case INDEX_op_clz_i32: - case INDEX_op_ctz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, rZW); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rZ, rZ); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, rZ); - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, ri, ri); - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, ri, ri); - case INDEX_op_add2_i64: - case INDEX_op_add2_i32: - return C_O2_I4(r, r, r, r, rI, rZM); - case INDEX_op_sub2_i64: - case INDEX_op_sub2_i32: - return C_O2_I4(r, r, rI, rZM, r, r); - - case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); - - case INDEX_op_qemu_st_i32: - return C_O0_I2(r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); - - case INDEX_op_qemu_ld_i128: - return C_N1O1_I1(o, m, r); - case INDEX_op_qemu_st_i128: - return C_O0_I3(o, m, r); + case INDEX_op_qemu_st2: + return TCG_TARGET_REG_BITS == 64 + ? C_O0_I3(o, m, r) : C_O0_I3(r, r, r); case INDEX_op_add_vec: case INDEX_op_sub_vec: diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index e92e815..0fc26d3 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -11,16 +11,13 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rz) +C_O0_I2(r, rz) C_O1_I1(r, r) +C_O1_I2(r, r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) -C_O1_I2(r, r, rJ) -C_O1_I2(r, rz, rN) -C_O1_I2(r, rz, rz) C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) -C_O2_I4(r, r, rz, rz, rM, rM) C_O0_I2(v, r) C_O1_I1(v, r) C_O1_I1(v, v) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 2f97006..c04e15d 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -16,8 +16,6 @@ REGS('v', ALL_VECTOR_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S12) -CONST('J', TCG_CT_CONST_J12) CONST('K', TCG_CT_CONST_S5) CONST('L', TCG_CT_CONST_CMP_VI) -CONST('N', TCG_CT_CONST_N12) CONST('M', TCG_CT_CONST_M12) diff --git a/tcg/riscv/tcg-target-has.h b/tcg/riscv/tcg-target-has.h index 9808108..aef10c2 100644 --- a/tcg/riscv/tcg-target-has.h +++ b/tcg/riscv/tcg-target-has.h @@ -10,69 +10,8 @@ #include "host/cpuinfo.h" /* optional instructions */ -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_div2_i32 0 -#define TCG_TARGET_HAS_rot_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_orc_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_eqv_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctz_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_qemu_st8_i32 0 - -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_div2_i64 0 -#define TCG_TARGET_HAS_rot_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_extract2_i64 0 #define TCG_TARGET_HAS_extr_i64_i32 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_orc_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_eqv_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctz_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB) -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 1 -#define TCG_TARGET_HAS_mulsh_i64 1 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 0 /* vector instructions */ diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f7e1ca5..f9417d1 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -113,11 +113,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) } #define TCG_CT_CONST_S12 0x100 -#define TCG_CT_CONST_N12 0x200 -#define TCG_CT_CONST_M12 0x400 -#define TCG_CT_CONST_J12 0x800 -#define TCG_CT_CONST_S5 0x1000 -#define TCG_CT_CONST_CMP_VI 0x2000 +#define TCG_CT_CONST_M12 0x200 +#define TCG_CT_CONST_S5 0x400 +#define TCG_CT_CONST_CMP_VI 0x800 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -402,28 +400,14 @@ static bool tcg_target_const_match(int64_t val, int ct, return 1; } /* - * Sign extended from 12 bits, negated: [-0x7ff, 0x800]. - * Used for subtraction, where a constant must be handled by ADDI. - */ - if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) { - return 1; - } - /* * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. - * Used by addsub2 and movcond, which may need the negative value, + * Used by movcond, which may need the negative value, * and requires the modified constant to be representable. */ if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { return 1; } /* - * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff]. - * Used to map ANDN back to ANDI, etc. - */ - if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) { - return 1; - } - /* * Sign extended from 5 bits: [-0x10, 0x0f]. * Used for vector-immediate. */ @@ -1089,67 +1073,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_addsub2(TCGContext *s, - TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, - TCGArg bl, TCGArg bh, - bool cbl, bool cbh, bool is_sub, bool is32bit) -{ - const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD; - const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI; - const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB; - TCGReg th = TCG_REG_TMP1; - - /* If we have a negative constant such that negating it would - make the high part zero, we can (usually) eliminate one insn. */ - if (cbl && cbh && bh == -1 && bl != 0) { - bl = -bl; - bh = 0; - is_sub = !is_sub; - } - - /* By operating on the high part first, we get to use the final - carry operation to move back from the temporary. */ - if (!cbh) { - tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh); - } else if (bh != 0 || ah == rl) { - tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh)); - } else { - th = ah; - } - - /* Note that tcg optimization should eliminate the bl == 0 case. */ - if (is_sub) { - if (cbl) { - tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl); - tcg_out_opc_imm(s, opc_addi, rl, al, -bl); - } else { - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl); - tcg_out_opc_reg(s, opc_sub, rl, al, bl); - } - tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0); - } else { - if (cbl) { - tcg_out_opc_imm(s, opc_addi, rl, al, bl); - tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl); - } else if (al == bl) { - /* - * If the input regs overlap, this is a simple doubling - * and carry-out is the input msb. This special case is - * required when the output reg overlaps the input, - * but we might as well use it always. - */ - tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0); - tcg_out_opc_reg(s, opc_add, rl, al, al); - } else { - tcg_out_opc_reg(s, opc_add, rl, al, bl); - tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, - rl, (rl == bl ? al : bl)); - } - tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0); - } -} - static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src) { @@ -1184,6 +1107,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0); } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, l, 0); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); +} + static const struct { RISCVInsn op; bool swap; @@ -1200,8 +1129,8 @@ static const struct { [TCG_COND_GTU] = { OPC_BLTU, true } }; -static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, - TCGReg arg2, TCGLabel *l) +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) { RISCVInsn op = tcg_brcond_to_riscv[cond].op; @@ -1217,6 +1146,11 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rz), + .out_rr = tgen_brcond, +}; + #define SETCOND_INV TCG_TARGET_NB_REGS #define SETCOND_NEZ (SETCOND_INV << 1) #define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) @@ -1341,6 +1275,24 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, cond, dest, arg1, arg2, true); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg arg1, tcg_target_long arg2, bool c2) { @@ -1379,6 +1331,24 @@ static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_negsetcond(s, cond, dest, arg1, arg2, false); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_negsetcond(s, cond, dest, arg1, arg2, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne, int val1, bool c_val1, int val2, bool c_val2) @@ -1476,10 +1446,10 @@ static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_mov(s, TCG_TYPE_REG, ret, tmp); } -static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg cmp1, int cmp2, bool c_cmp2, - TCGReg val1, bool c_val1, - TCGReg val2, bool c_val2) +static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg cmp1, TCGArg cmp2, bool c_cmp2, + TCGArg val1, bool c_val1, + TCGArg val2, bool c_val2) { int tmpflags; TCGReg t; @@ -1506,6 +1476,11 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rI, rM, rM), + .out = tcg_out_movcond, +}; + static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, TCGReg ret, TCGReg src1, int src2, bool c_src2) { @@ -1517,7 +1492,7 @@ static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, * Note that constraints put 'ret' in a new register, so the * computation above did not clobber either 'src1' or 'src2'. */ - tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true, + tcg_out_movcond(s, type, TCG_COND_EQ, ret, src1, 0, true, src2, c_src2, ret, false); } } @@ -1613,7 +1588,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg, tcg_out_call_int(s, arg, false); } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { tcg_insn_unit insn = OPC_FENCE; @@ -1858,22 +1833,31 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val, } } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; TCGReg base; ldst = prepare_host_addr(s, &base, addr_reg, oi, true); - tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type); + tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), type); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, TCGReg base, MemOp opc) { @@ -1898,8 +1882,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val, } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; TCGReg base; @@ -1908,12 +1892,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi)); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + static const tcg_insn_unit *tb_ret_addr; static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) @@ -1940,6 +1933,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -1957,452 +1955,693 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, flush_idcache_range(jmp_rx, jmp_rw, 4); } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGArg a0 = args[0]; - TCGArg a1 = args[1]; - TCGArg a2 = args[2]; - int c2 = const_args[2]; + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDW : OPC_ADD; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); - break; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI; + tcg_out_opc_imm(s, insn, a0, a1, a2); +} - case INDEX_op_br: - tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0); - tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - tcg_out_ldst(s, OPC_LBU, a0, a1, a2); - break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - tcg_out_ldst(s, OPC_LB, a0, a1, a2); - break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - tcg_out_ldst(s, OPC_LHU, a0, a1, a2); - break; - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - tcg_out_ldst(s, OPC_LH, a0, a1, a2); - break; - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, OPC_LWU, a0, a1, a2); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, OPC_LW, a0, a1, a2); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, OPC_LD, a0, a1, a2); - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - tcg_out_ldst(s, OPC_SB, a0, a1, a2); - break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - tcg_out_ldst(s, OPC_SH, a0, a1, a2); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, OPC_SW, a0, a1, a2); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, OPC_SD, a0, a1, a2); - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_add_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2); - } - break; - case INDEX_op_add_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_sub_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2); - } else { - tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2); - } - break; - case INDEX_op_sub_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2); - } else { - tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2); - } - break; +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} - case INDEX_op_and_i32: - case INDEX_op_and_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); - } - break; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_AND, a0, a1, a2); +} - case INDEX_op_or_i32: - case INDEX_op_or_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); - } - break; +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2); +} - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); - } else { - tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); - } - break; - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); - } - break; - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2); - } else { - tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); - } - break; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); +} - case INDEX_op_not_i32: - case INDEX_op_not_i64: - tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); - break; +static TCGConstraintSetIndex cset_zbb_rrr(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, r) : C_NotImplemented; +} - case INDEX_op_neg_i32: - tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1); - break; - case INDEX_op_neg_i64: - tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1); - break; +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_andc, +}; - case INDEX_op_mul_i32: - tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2); - break; - case INDEX_op_mul_i64: - tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2); - break; +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, false); +} - case INDEX_op_div_i32: - tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2); - break; - case INDEX_op_div_i64: - tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2); - break; +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, true); +} - case INDEX_op_divu_i32: - tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2); - break; - case INDEX_op_divu_i64: - tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2); - break; +static TCGConstraintSetIndex cset_clzctz(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_N1_I2(r, r, rM) : C_NotImplemented; +} - case INDEX_op_rem_i32: - tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2); - break; - case INDEX_op_rem_i64: - tcg_out_opc_reg(s, OPC_REM, a0, a1, a2); - break; +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clzctz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; - case INDEX_op_remu_i32: - tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2); - break; - case INDEX_op_remu_i64: - tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2); - break; +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CPOPW : OPC_CPOP; + tcg_out_opc_imm(s, insn, a0, a1, 0); +} - case INDEX_op_shl_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); - } - break; - case INDEX_op_shl_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); - } - break; +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented; +} - case INDEX_op_shr_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); - } - break; - case INDEX_op_shr_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); - } - break; +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; - case INDEX_op_sar_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); - } - break; - case INDEX_op_sar_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); - } - break; +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, false); +} - case INDEX_op_rotl_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2); - } - break; - case INDEX_op_rotl_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2); - } - break; +static void tgen_ctzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ; + tcg_out_cltz(s, type, insn, a0, a1, a2, true); +} - case INDEX_op_rotr_i32: - if (c2) { - tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f); - } else { - tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2); - } - break; - case INDEX_op_rotr_i64: - if (c2) { - tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f); - } else { - tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2); - } - break; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clzctz, + .out_rrr = tgen_ctz, + .out_rri = tgen_ctzi, +}; - case INDEX_op_bswap64_i64: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - break; - case INDEX_op_bswap32_i32: - a2 = 0; - /* fall through */ - case INDEX_op_bswap32_i64: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - if (a2 & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); - } else { - tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); - } - break; - case INDEX_op_bswap16_i64: - case INDEX_op_bswap16_i32: - tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); - if (a2 & TCG_BSWAP_OZ) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); - } else { - tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); - } - break; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVW : OPC_DIV; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} - case INDEX_op_ctpop_i32: - tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0); - break; - case INDEX_op_ctpop_i64: - tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); - break; +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; - case INDEX_op_clz_i32: - tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2); - break; - case INDEX_op_clz_i64: - tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2); - break; - case INDEX_op_ctz_i32: - tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2); - break; - case INDEX_op_ctz_i64: - tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2); - break; +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_add2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false, true); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], false, false); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true, true); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], - const_args[4], const_args[5], true, false); - break; +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVUW : OPC_DIVU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - tcg_out_brcond(s, a2, a0, a1, arg_label(args[3])); - break; +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2, c2); - break; +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - tcg_out_negsetcond(s, args[3], a0, a1, a2, c2); - break; +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); +} - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - tcg_out_movcond(s, args[5], a0, a1, a2, c2, - args[3], const_args[3], args[4], const_args[4]); - break; +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_eqv, +}; - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); - break; +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); +} - case INDEX_op_extrh_i64_i32: - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32); - break; +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; - case INDEX_op_mulsh_i32: - case INDEX_op_mulsh_i64: - tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); - break; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_MULW : OPC_MUL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} - case INDEX_op_muluh_i32: - case INDEX_op_muluh_i64: - tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); - break; +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; - case INDEX_op_extract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32u(s, a0, a1); - } else { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_extract_i32: - switch (args[3]) { - case 1: - tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2); - break; +static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I32 ? C_NotImplemented : C_O1_I2(r, r, r); +} + +static void tgen_mulsh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_mulsh, +}; + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulh, + .out_rrr = tgen_muluh, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_OR, a0, a1, a2); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_zbb_rrr, + .out_rrr = tgen_orc, +}; + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMW : OPC_REM; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMUW : OPC_REMU; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + +static TCGConstraintSetIndex cset_rot(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, ri) : C_NotImplemented; +} + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORW : OPC_ROR; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORIW : OPC_RORI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rot, + .out_rrr = tgen_rotr, + .out_rri = tgen_rotri, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ROLW : OPC_ROL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_rotri(s, type, a0, a1, -a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_rot, + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAW : OPC_SRA; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAIW : OPC_SRAI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLW : OPC_SLL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLIW : OPC_SLLI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLW : OPC_SRL; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLIW : OPC_SRLI; + unsigned mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_opc_imm(s, insn, a0, a1, a2 & mask); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SUBW : OPC_SUB; + tcg_out_opc_reg(s, insn, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rI), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static TCGConstraintSetIndex cset_bswap(TCGType type, unsigned flags) +{ + return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented; +} + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (flags & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap32, +}; + +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_bswap, + .out_rr = tgen_bswap64, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_ZERO, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_xori(s, type, a0, a1, -1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { case 16: - tcg_debug_assert(a2 == 0); tcg_out_ext16u(s, a0, a1); - break; - default: - g_assert_not_reached(); + return; + case 32: + tcg_out_ext32u(s, a0, a1); + return; } - break; + } + if (ofs + len == 32) { + tgen_shli(s, TCG_TYPE_I32, a0, a1, ofs); + return; + } + if (len == 1) { + tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, ofs); + return; + } + g_assert_not_reached(); +} - case INDEX_op_sextract_i64: - if (a2 + args[3] == 32) { - if (a2 == 0) { - tcg_out_ext32s(s, a0, a1); - } else { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); - } - break; - } - /* FALLTHRU */ - case INDEX_op_sextract_i32: - if (a2 == 0 && args[3] == 8) { - tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1); - } else if (a2 == 0 && args[3] == 16) { - tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1); - } else { - g_assert_not_reached(); - } - break; +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + if (ofs == 0) { + switch (len) { + case 8: + tcg_out_ext8s(s, type, a0, a1); + return; + case 16: + tcg_out_ext16s(s, type, a0, a1); + return; + case 32: + tcg_out_ext32s(s, a0, a1); + return; + } + } else if (ofs + len == 32) { + tgen_sari(s, TCG_TYPE_I32, a0, a1, ofs); + return; } + g_assert_not_reached(); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LBU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LB, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LHU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LH, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LWU, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_LW, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SB, data, base, offset); } +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, OPC_SH, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2624,142 +2863,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_not_i32: - case INDEX_op_neg_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld_i64: - case INDEX_op_not_i64: - case INDEX_op_neg_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_extrl_i64_i32: - case INDEX_op_extrh_i64_i32: - case INDEX_op_ext_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(rz, r); - - case INDEX_op_add_i32: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_add_i64: - case INDEX_op_and_i64: - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rI); - - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rJ); - - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - return C_O1_I2(r, rz, rN); - - case INDEX_op_mul_i32: - case INDEX_op_mulsh_i32: - case INDEX_op_muluh_i32: - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - case INDEX_op_mul_i64: - case INDEX_op_mulsh_i64: - case INDEX_op_muluh_i64: - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - return C_O1_I2(r, rz, rz); - - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_N1_I2(r, r, rM); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rz); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rI, rM, rM); - - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rM, rM); - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(rz, r); - case INDEX_op_st_vec: return C_O0_I2(v, r); case INDEX_op_dup_vec: diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 370e4b1..f67fd78 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -22,6 +22,7 @@ C_O1_I1(r, r) C_O1_I1(v, r) C_O1_I1(v, v) C_O1_I1(v, vr) +C_O1_I2(r, 0, r) C_O1_I2(r, 0, ri) C_O1_I2(r, 0, rI) C_O1_I2(r, 0, rJ) @@ -31,20 +32,16 @@ C_O1_I2(r, r, rC) C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, r, rK) -C_O1_I2(r, r, rKR) -C_O1_I2(r, r, rNK) C_O1_I2(r, r, rNKR) +C_O1_I2(r, r, rUV) C_O1_I2(r, rZ, r) C_O1_I2(v, v, r) C_O1_I2(v, v, v) C_O1_I3(v, v, v, v) C_O1_I4(v, v, v, vZ, v) C_O1_I4(v, v, v, vZM, v) -C_O1_I4(r, r, ri, rI, r) C_O1_I4(r, r, rC, rI, r) C_O2_I1(o, m, r) C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) -C_N1_O1_I4(r, r, 0, 1, ri, r) -C_N1_O1_I4(r, r, 0, 1, rJU, r) diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h index 3e574e0..636a38a 100644 --- a/tcg/s390x/tcg-target-con-str.h +++ b/tcg/s390x/tcg-target-con-str.h @@ -24,4 +24,5 @@ CONST('M', TCG_CT_CONST_M1) CONST('N', TCG_CT_CONST_INV) CONST('R', TCG_CT_CONST_INVRISBG) CONST('U', TCG_CT_CONST_U32) +CONST('V', TCG_CT_CONST_N32) CONST('Z', TCG_CT_CONST_ZERO) diff --git a/tcg/s390x/tcg-target-has.h b/tcg/s390x/tcg-target-has.h index e99e671..0aeb5ba 100644 --- a/tcg/s390x/tcg-target-has.h +++ b/tcg/s390x/tcg-target-has.h @@ -29,65 +29,8 @@ extern uint64_t s390_facilities[3]; ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1) /* optional instructions */ -#define TCG_TARGET_HAS_div2_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_clz_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 1 -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - -#define TCG_TARGET_HAS_div2_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 1 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2) -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 1 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index b2e1cd6..7ca0071 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -43,6 +43,7 @@ #define TCG_CT_CONST_INVRISBG (1 << 14) #define TCG_CT_CONST_CMP (1 << 15) #define TCG_CT_CONST_M1 (1 << 16) +#define TCG_CT_CONST_N32 (1 << 17) #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16) #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32) @@ -134,6 +135,9 @@ typedef enum S390Opcode { RIEc_CLGIJ = 0xec7d, RIEc_CLIJ = 0xec7f, + RIEd_ALHSIK = 0xecda, + RIEd_ALGHSIK = 0xecdb, + RIEf_RISBG = 0xec55, RIEg_LOCGHI = 0xec46, @@ -172,6 +176,8 @@ typedef enum S390Opcode { RRE_SLBGR = 0xb989, RRE_XGR = 0xb982, + RRFa_ALRK = 0xb9fa, + RRFa_ALGRK = 0xb9ea, RRFa_MGRK = 0xb9ec, RRFa_MSRKC = 0xb9fd, RRFa_MSGRKC = 0xb9ed, @@ -613,7 +619,10 @@ static bool tcg_target_const_match(int64_t val, int ct, if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { return true; } - if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + if ((ct & TCG_CT_CONST_U32) && uval <= UINT32_MAX) { + return true; + } + if ((ct & TCG_CT_CONST_N32) && -uval <= UINT32_MAX) { return true; } if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) { @@ -676,8 +685,16 @@ static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); } +static void tcg_out_insn_RIEd(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r3, int i2) +{ + tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); + tcg_out16(s, i2); + tcg_out16(s, op & 0xff); +} + static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1, - int i2, int m3) + int i2, int m3) { tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3); tcg_out32(s, (i2 << 16) | (op & 0xff)); @@ -951,25 +968,32 @@ static void tcg_out_movi(TCGContext *s, TCGType type, if (pc_off == (int32_t)pc_off) { tcg_out_insn(s, RIL, LARL, ret, pc_off); if (sval & 1) { - tcg_out_insn(s, RI, AGHI, ret, 1); + tcg_out_insn(s, RX, LA, ret, ret, TCG_REG_NONE, 1); } return; } - /* Otherwise, load it by parts. */ - i = is_const_p16((uint32_t)uval); - if (i >= 0) { - tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); - } else { - tcg_out_insn(s, RIL, LLILF, ret, uval); - } - uval >>= 32; - i = is_const_p16(uval); - if (i >= 0) { - tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16)); - } else { - tcg_out_insn(s, RIL, OIHF, ret, uval); + if (!s->carry_live) { + /* Load by parts, at most 2 instructions. */ + i = is_const_p16((uint32_t)uval); + if (i >= 0) { + tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16)); + } else { + tcg_out_insn(s, RIL, LLILF, ret, uval); + } + uval >>= 32; + i = is_const_p16(uval); + if (i >= 0) { + tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16)); + } else { + tcg_out_insn(s, RIL, OIHF, ret, uval); + } + return; } + + /* Otherwise, stuff it in the constant pool. */ + tcg_out_insn(s, RIL, LGRL, ret, 0); + new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2); } /* Emit a load/store type instruction. Inputs are: @@ -1370,9 +1394,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1, return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc); } -static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, - TCGReg dest, TCGReg c1, TCGArg c2, - bool c2const, bool neg) +static void tgen_setcond_int(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg c1, TCGArg c2, + bool c2const, bool neg) { int cc; @@ -1464,6 +1488,42 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc); } +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, false, false); +} + +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tgen_setcond_int(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rC), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest, TCGArg v3, int v3const, TCGReg v4, int cc, int inv_cc) @@ -1504,9 +1564,9 @@ static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest, tcg_out_insn(s, RRFc, LOCGR, dest, src, cc); } -static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, - TCGReg c1, TCGArg c2, int c2const, - TCGArg v3, int v3const, TCGReg v4) +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg dest, TCGReg c1, TCGArg c2, bool c2const, + TCGArg v3, bool v3const, TCGArg v4, bool v4const) { int cc, inv_cc; @@ -1514,63 +1574,47 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest, tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc); } -static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1, - TCGArg a2, int a2const) -{ - /* Since this sets both R and R+1, we have no choice but to store the - result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */ - QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1); - tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1); +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rC, rI, r), + .out = tgen_movcond, +}; - if (a2const && a2 == 64) { - tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0); - return; - } +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + unsigned lsb = (63 - ofs); + unsigned msb = lsb - (len - 1); /* - * Conditions from FLOGR are: - * 2 -> one bit found - * 8 -> no one bit found + * Since we can't support "0Z" as a constraint, we allow a1 in + * any register. Fix things up as if a matching constraint. */ - tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); -} - -static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) -{ - /* With MIE3, and bit 0 of m4 set, we get the complete result. */ - if (HAVE_FACILITY(MISC_INSN_EXT3)) { - if (type == TCG_TYPE_I32) { - tcg_out_ext32u(s, dest, src); - src = dest; + if (a0 != a1) { + if (a0 == a2) { + tcg_out_mov(s, type, TCG_TMP0, a2); + a2 = TCG_TMP0; } - tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); - return; - } - - /* Without MIE3, each byte gets the count of bits for the byte. */ - tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); - - /* Multiply to sum each byte at the top of the word. */ - if (type == TCG_TYPE_I32) { - tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); - tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); - tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); - tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); + tcg_out_mov(s, type, a0, a1); } + tcg_out_risbg(s, a0, a2, msb, lsb, ofs, false); } -static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len, int z) +static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2, + unsigned ofs, unsigned len) { - int lsb = (63 - ofs); - int msb = lsb - (len - 1); - tcg_out_risbg(s, dest, src, msb, lsb, ofs, z); + unsigned lsb = (63 - ofs); + unsigned msb = lsb - (len - 1); + tcg_out_risbg(s, a0, a2, msb, lsb, ofs, true); } -static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, rZ, r), + .out_rrr = tgen_deposit, + .out_rzr = tgen_depositz, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg dest, + TCGReg src, unsigned ofs, unsigned len) { if (ofs == 0) { switch (len) { @@ -1588,8 +1632,13 @@ static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src, tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1); } -static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src, - int ofs, int len) +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg dest, + TCGReg src, unsigned ofs, unsigned len) { if (ofs == 0) { switch (len) { @@ -1607,6 +1656,15 @@ static void tgen_sextract(TCGContext *s, TCGReg dest, TCGReg src, g_assert_not_reached(); } +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; @@ -1631,6 +1689,11 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) } } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tgen_branch(s, S390_CC_ALWAYS, l); +} + static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, TCGReg r1, TCGReg r2, TCGLabel *l) { @@ -1704,6 +1767,24 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c, tgen_branch(s, cc, l); } +static void tgen_brcondr(TCGContext *s, TCGType type, TCGCond c, + TCGReg a0, TCGReg a1, TCGLabel *l) +{ + tgen_brcond(s, type, c, a0, a1, false, l); +} + +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond c, + TCGReg a0, tcg_target_long a1, TCGLabel *l) +{ + tgen_brcond(s, type, c, a0, a1, true, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rC), + .out_rr = tgen_brcondr, + .out_ri = tgen_brcondi, +}; + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest) { ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1; @@ -2000,8 +2081,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, return ldst; } -static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2010,14 +2091,19 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } -static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - MemOpIdx oi, TCGType data_type) +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) { TCGLabelQemuLdst *ldst; HostAddress h; @@ -2026,12 +2112,17 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data_reg; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, TCGReg addr_reg, MemOpIdx oi, bool is_ld) { @@ -2106,6 +2197,28 @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, } } +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, true); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_O2_I1(o, m, r), + .out = tgen_qemu_ld2, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr_reg, MemOpIdx oi) +{ + tcg_out_qemu_ldst_i128(s, datalo, datahi, addr_reg, oi, false); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_O0_I3(o, m, r), + .out = tgen_qemu_st2, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { /* Reuse the zeroing that exists for goto_ptr. */ @@ -2132,6 +2245,11 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { @@ -2145,660 +2263,903 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* no need to flush icache explicitly */ } -# define OP_32_64(x) \ - case glue(glue(INDEX_op_,x),_i32): \ - case glue(glue(INDEX_op_,x),_i64) -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - S390Opcode op, op2; - TCGArg a0, a1, a2; + if (a0 != a1) { + tcg_out_insn(s, RX, LA, a0, a1, a2, 0); + } else if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, AR, a0, a2); + } else { + tcg_out_insn(s, RRE, AGR, a0, a2); + } +} - switch (opc) { - case INDEX_op_goto_ptr: - a0 = args[0]; - tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); - break; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a0 == a1) { + if (type == TCG_TYPE_I32) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AHI, a0, a2); + } else { + tcg_out_insn(s, RIL, AFI, a0, a2); + } + return; + } + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, AGHI, a0, a2); + return; + } + if (a2 == (int32_t)a2) { + tcg_out_insn(s, RIL, AGFI, a0, a2); + return; + } + if (a2 == (uint32_t)a2) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + return; + } + if (-a2 == (uint32_t)-a2) { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + return; + } + } + tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); +} - OP_32_64(ld8u): - /* ??? LLC (RXY format) is only present with the extended-immediate - facility, whereas LLGC is always present. */ - tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; - OP_32_64(ld8s): - /* ??? LB is no smaller than LGB, so no point to using it. */ - tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); - break; +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, ALGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, ALR, a0, a2); + } else { + tcg_out_insn(s, RRFa, ALRK, a0, a1, a2); + } +} - OP_32_64(ld16u): - /* ??? LLH (RXY format) is only present with the extended-immediate - facility, whereas LLGH is always present. */ - tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (a2 == (int16_t)a2) { + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIEd, ALHSIK, a0, a1, a2); + } else { + tcg_out_insn(s, RIEd, ALGHSIK, a0, a1, a2); + } + return; + } - case INDEX_op_ld16s_i32: - tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); - break; + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, ALFI, a0, a2); + } else if (a2 >= 0) { + tcg_out_insn(s, RIL, ALGFI, a0, a2); + } else { + tcg_out_insn(s, RIL, SLGFI, a0, -a2); + } +} - case INDEX_op_ld_i32: - tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, r, rUV), + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, +}; - OP_32_64(st8): - tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, ALCR, a0, a2); + } else { + tcg_out_insn(s, RRE, ALCGR, a0, a2); + } +} - OP_32_64(st16): - tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], - TCG_REG_NONE, args[2]); - break; +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_addcio, +}; - case INDEX_op_st_i32: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_addcio, +}; - case INDEX_op_add_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - do_addi_32: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AHI, a0, a2); - break; - } - tcg_out_insn(s, RIL, AFI, a0, a2); - break; - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, AR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_32; - } else if (a0 == a1) { - tcg_out_insn(s, RR, SR, a0, a2); - } else { - tcg_out_insn(s, RRFa, SRK, a0, a1, a2); - } - break; +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out_insn(s, RR, SLR, TCG_REG_R0, TCG_REG_R0); /* cc = 2 */ +} - case INDEX_op_and_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_andi(s, TCG_TYPE_I32, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, NR, a0, a2); - } else { - tcg_out_insn(s, RRFa, NRK, a0, a1, a2); - } - break; - case INDEX_op_or_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_ori(s, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, OR, a0, a2); - } else { - tcg_out_insn(s, RRFa, ORK, a0, a1, a2); - } - break; - case INDEX_op_xor_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_insn(s, RIL, XILF, a0, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RR, XR, args[0], args[2]); - } else { - tcg_out_insn(s, RRFa, XRK, a0, a1, a2); - } - break; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, NR, a0, a2); + } else { + tcg_out_insn(s, RRFa, NRK, a0, a1, a2); + } +} - case INDEX_op_andc_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2); - } else { - tcg_out_insn(s, RRFa, NCRK, a0, a1, a2); - } - break; - case INDEX_op_orc_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tgen_ori(s, a0, (uint32_t)~a2); - } else { - tcg_out_insn(s, RRFa, OCRK, a0, a1, a2); - } - break; - case INDEX_op_eqv_i32: - a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - tcg_out_insn(s, RIL, XILF, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NXRK, a0, a1, a2); +static void tgen_andi_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_andi(s, type, a0, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rNKR), + .out_rrr = tgen_and, + .out_rri = tgen_andi_3, +}; + +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NCRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2); + } +} + +static TCGConstraintSetIndex cset_misc3_rrr(TCGType type, unsigned flags) +{ + return HAVE_FACILITY(MISC_INSN_EXT3) ? C_O1_I2(r, r, r) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_andc, +}; + +static void tgen_clz_int(TCGContext *s, TCGReg dest, TCGReg a1, + TCGArg a2, int a2const) +{ + /* + * Since this sets both R and R+1, we have no choice but to store the + * result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. + */ + QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1); + tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1); + + if (a2const && a2 == 64) { + tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0); + return; + } + + /* + * Conditions from FLOGR are: + * 2 -> one bit found + * 8 -> no one bit found + */ + tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); +} + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_clz_int(s, a0, a1, a2, false); +} + +static void tgen_clzi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_clz_int(s, a0, a1, a2, true); +} + +static TCGConstraintSetIndex cset_clz(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I64 ? C_O1_I2(r, r, rI) : C_NotImplemented; +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_clz, + .out_rrr = tgen_clz, + .out_rri = tgen_clzi, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + /* With MIE3, and bit 0 of m4 set, we get the complete result. */ + if (HAVE_FACILITY(MISC_INSN_EXT3)) { + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, dest, src); + src = dest; } - break; - case INDEX_op_nand_i32: - tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]); - break; - case INDEX_op_nor_i32: - tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]); - break; + tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); + return; + } - case INDEX_op_neg_i32: - tcg_out_insn(s, RR, LCR, args[0], args[1]); - break; - case INDEX_op_not_i32: - tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]); - break; + /* Without MIE3, each byte gets the count of bits for the byte. */ + tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); - case INDEX_op_mul_i32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, MHI, a0, a2); - } else { - tcg_out_insn(s, RIL, MSFI, a0, a2); - } - } else if (a0 == a1) { + /* Multiply to sum each byte at the top of the word. */ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); + tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); + tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); + tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); + } +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_ctpop, +}; + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divs2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, DR, a1, a4); + } else { + /* + * TODO: Move the sign-extend of the numerator from a2 into a3 + * into the tcg backend, instead of in early expansion. It is + * required for 32-bit DR, but not 64-bit DSGR. + */ + tcg_out_insn(s, RRE, DSGR, a1, a4); + } +} + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_O2_I3(o, m, 0, 1, r), + .out_rr01r = tgen_divs2, +}; + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, DLR, a1, a4); + } else { + tcg_out_insn(s, RRE, DLGR, a1, a4); + } +} + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_O2_I3(o, m, 0, 1, r), + .out_rr01r = tgen_divu2, +}; + +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NXRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_eqv, +}; + +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_sh64(s, RSY_SRLG, a0, a1, TCG_REG_NONE, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + if (a0 == a1) { tcg_out_insn(s, RRE, MSR, a0, a2); } else { tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2); } - break; - - case INDEX_op_div2_i32: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RR, DR, args[1], args[4]); - break; - case INDEX_op_divu2_i32: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DLR, args[1], args[4]); - break; - - case INDEX_op_shl_i32: - op = RS_SLL; - op2 = RSY_SLLK; - do_shift32: - a0 = args[0], a1 = args[1], a2 = (int32_t)args[2]; + } else { if (a0 == a1) { - if (const_args[2]) { - tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2); - } else { - tcg_out_sh32(s, op, a0, a2, 0); - } + tcg_out_insn(s, RRE, MSGR, a0, a2); } else { - /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */ - if (const_args[2]) { - tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2); - } else { - tcg_out_sh64(s, op2, a0, a1, a2, 0); - } + tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); } - break; - case INDEX_op_shr_i32: - op = RS_SRL; - op2 = RSY_SRLK; - goto do_shift32; - case INDEX_op_sar_i32: - op = RS_SRA; - op2 = RSY_SRAK; - goto do_shift32; + } +} - case INDEX_op_rotl_i32: - /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]); +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MHI, a0, a2); } else { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0); + tcg_out_insn(s, RIL, MSFI, a0, a2); } - break; - case INDEX_op_rotr_i32: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLL, args[0], args[1], - TCG_REG_NONE, (32 - args[2]) & 31); + } else { + if (a2 == (int16_t)a2) { + tcg_out_insn(s, RI, MGHI, a0, a2); } else { - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0); + tcg_out_insn(s, RIL, MSGFI, a0, a2); } - break; + } +} + +static TCGConstraintSetIndex cset_mul(TCGType type, unsigned flags) +{ + return (HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O1_I2(r, r, rJ) + : C_O1_I2(r, 0, rJ)); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul, + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + tcg_out_insn(s, RRFa, MGRK, a1, a2, a3); +} + +static TCGConstraintSetIndex cset_muls2(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O2_I2(o, m, r, r) : C_NotImplemented); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_muls2, + .out_rrrr = tgen_muls2, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_debug_assert(a0 == a2); + tcg_debug_assert((a1 & 1) == 0); + tcg_debug_assert(a0 == a1 + 1); + tcg_out_insn(s, RRE, MLGR, a1, a3); +} + +static TCGConstraintSetIndex cset_mulu2(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && HAVE_FACILITY(MISC_INSN_EXT2) + ? C_O2_I2(o, m, 0, r) : C_NotImplemented); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mulu2, + .out_rrrr = tgen_mulu2, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NNRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NNGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_nand, +}; + +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, NORK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, NOGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_nor, +}; - case INDEX_op_bswap16_i32: - a0 = args[0], a1 = args[1], a2 = args[2]; +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, OGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, OR, a0, a2); + } else { + tcg_out_insn(s, RRFa, ORK, a0, a1, a2); + } +} + +static void tgen_ori_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_ori(s, a0, type == TCG_TYPE_I32 ? (uint32_t)a2 : a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rK), + .out_rrr = tgen_or, + .out_rri = tgen_ori_3, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, OCRK, a0, a1, a2); + } else { + tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2); + } +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_misc3_rrr, + .out_rrr = tgen_orc, +}; + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_rotl_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + S390Opcode insn = type == TCG_TYPE_I32 ? RSY_RLL : RSY_RLLG; + tcg_out_sh64(s, insn, dst, src, v, i); +} + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_rotl_int(s, type, a0, a1, a2, 0); +} + +static void tgen_rotli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_rotl_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_rotl, + .out_rri = tgen_rotli, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_sar_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SRAG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SRA, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SRAK, dst, src, v, i); + } +} + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_sar_int(s, type, a0, a1, a2, 0); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_sar_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SLLG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SLL, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SLLK, dst, src, v, i); + } +} + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_shl_int(s, type, a0, a1, a2, 0); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_shl_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr_int(TCGContext *s, TCGType type, TCGReg dst, + TCGReg src, TCGReg v, tcg_target_long i) +{ + if (type != TCG_TYPE_I32) { + tcg_out_sh64(s, RSY_SRLG, dst, src, v, i); + } else if (dst == src) { + tcg_out_sh32(s, RS_SRL, dst, v, i); + } else { + tcg_out_sh64(s, RSY_SRLK, dst, src, v, i); + } +} + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_shr_int(s, type, a0, a1, a2, 0); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_shr_int(s, type, a0, a1, TCG_REG_NONE, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, ri), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, SR, a0, a2); + } else { + tcg_out_insn(s, RRFa, SRK, a0, a1, a2); + } +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, SLGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, SLR, a0, a2); + } else { + tcg_out_insn(s, RRFa, SLRK, a0, a1, a2); + } +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, SLFI, a0, a2); + } else if (a2 >= 0) { + tcg_out_insn(s, RIL, SLGFI, a0, a2); + } else { + tcg_out_insn(s, RIL, ALGFI, a0, -a2); + } +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, r, rUV), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, +}; + +static void tgen_subbio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RRE, SLBR, a0, a2); + } else { + tcg_out_insn(s, RRE, SLBGR, a0, a2); + } +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, 0, r), + .out_rrr = tgen_subbio, +}; + +#define outop_subbi outop_subbio + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out_insn(s, RR, CLR, TCG_REG_R0, TCG_REG_R0); /* cc = 0 */ +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_insn(s, RRFa, XGRK, a0, a1, a2); + } else if (a0 == a1) { + tcg_out_insn(s, RR, XR, a0, a2); + } else { + tcg_out_insn(s, RRFa, XRK, a0, a1, a2); + } +} + +static void tgen_xori_3(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_mov(s, type, a0, a1); + tgen_xori(s, a0, type == TCG_TYPE_I32 ? (uint32_t)a2 : a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rK), + .out_rrr = tgen_xor, + .out_rri = tgen_xori_3, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + if (type == TCG_TYPE_I32) { tcg_out_insn(s, RRE, LRVR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16); - } else { - tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16); - } - break; - case INDEX_op_bswap16_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; + tcg_out_sh32(s, (flags & TCG_BSWAP_OS ? RS_SRA : RS_SRL), + a0, TCG_REG_NONE, 16); + } else { tcg_out_insn(s, RRE, LRVGR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48); - } else { - tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48); - } - break; + tcg_out_sh64(s, (flags & TCG_BSWAP_OS ? RSY_SRAG : RSY_SRLG), + a0, a0, TCG_REG_NONE, 48); + } +} - case INDEX_op_bswap32_i32: - tcg_out_insn(s, RRE, LRVR, args[0], args[1]); - break; - case INDEX_op_bswap32_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - tcg_out_insn(s, RRE, LRVR, a0, a1); - if (a2 & TCG_BSWAP_OS) { - tcg_out_ext32s(s, a0, a0); - } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - tcg_out_ext32u(s, a0, a0); - } - break; +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; - case INDEX_op_add2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, ALFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, ALR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCR, args[1], args[5]); - break; - case INDEX_op_sub2_i32: - if (const_args[4]) { - tcg_out_insn(s, RIL, SLFI, args[0], args[4]); - } else { - tcg_out_insn(s, RR, SLR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBR, args[1], args[5]); - break; +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_insn(s, RRE, LRVR, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_ext32s(s, a0, a0); + } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + tcg_out_ext32u(s, a0, a0); + } +} - case INDEX_op_br: - tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0])); - break; +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; - case INDEX_op_brcond_i32: - tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2], false); - break; - case INDEX_op_negsetcond_i32: - tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], - args[2], const_args[2], true); - break; - case INDEX_op_movcond_i32: - tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3], args[4]); - break; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_insn(s, RRE, LRVGR, a0, a1); +} - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64); - break; - case INDEX_op_qemu_ld_i128: - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); - break; - case INDEX_op_qemu_st_i128: - tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); - break; +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; - case INDEX_op_ld16s_i64: - tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32u_i64: - tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RR, LCR, a0, a1); + } else { + tcg_out_insn(s, RRE, LCGR, a0, a1); + } +} - case INDEX_op_st32_i64: - tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); - break; - case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; - case INDEX_op_add_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - do_addi_64: - if (a0 == a1) { - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, AGHI, a0, a2); - break; - } - if (a2 == (int32_t)a2) { - tcg_out_insn(s, RIL, AGFI, a0, a2); - break; - } - if (a2 == (uint32_t)a2) { - tcg_out_insn(s, RIL, ALGFI, a0, a2); - break; - } - if (-a2 == (uint32_t)-a2) { - tcg_out_insn(s, RIL, SLGFI, a0, -a2); - break; - } - } - tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2); - } else if (a0 == a1) { - tcg_out_insn(s, RRE, AGR, a0, a2); - } else { - tcg_out_insn(s, RX, LA, a0, a1, a2, 0); - } - break; - case INDEX_op_sub_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - a2 = -a2; - goto do_addi_64; - } else { - tcg_out_insn(s, RRFa, SGRK, a0, a1, a2); - } - break; +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_nor(s, type, a0, a1, a1); +} - case INDEX_op_and_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_andi(s, TCG_TYPE_I64, args[0], args[2]); - } else { - tcg_out_insn(s, RRFa, NGRK, a0, a1, a2); - } - break; - case INDEX_op_or_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_ori(s, a0, a2); - } else { - tcg_out_insn(s, RRFa, OGRK, a0, a1, a2); - } - break; - case INDEX_op_xor_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_xori(s, a0, a2); - } else { - tcg_out_insn(s, RRFa, XGRK, a0, a1, a2); - } - break; +static TCGConstraintSetIndex cset_not(TCGType type, unsigned flags) +{ + return HAVE_FACILITY(MISC_INSN_EXT3) ? C_O1_I1(r, r) : C_NotImplemented; +} - case INDEX_op_andc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_andi(s, TCG_TYPE_I64, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2); - } - break; - case INDEX_op_orc_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_ori(s, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2); - } - break; - case INDEX_op_eqv_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - tgen_xori(s, a0, ~a2); - } else { - tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2); - } - break; - case INDEX_op_nand_i64: - tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]); - break; - case INDEX_op_nor_i64: - tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]); - break; +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_not, + .out_rr = tgen_not, +}; - case INDEX_op_neg_i64: - tcg_out_insn(s, RRE, LCGR, args[0], args[1]); - break; - case INDEX_op_not_i64: - tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]); - break; - case INDEX_op_bswap64_i64: - tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); - break; +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + /* + * The host memory model is quite strong, we simply need to + * serialize the instruction stream. + */ + if (a0 & TCG_MO_ST_LD) { + /* fast-bcr-serialization facility (45) is present */ + tcg_out_insn(s, RR, BCR, 14, 0); + } +} - case INDEX_op_mul_i64: - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[2]) { - tcg_out_mov(s, TCG_TYPE_I64, a0, a1); - if (a2 == (int16_t)a2) { - tcg_out_insn(s, RI, MGHI, a0, a2); - } else { - tcg_out_insn(s, RIL, MSGFI, a0, a2); - } - } else if (a0 == a1) { - tcg_out_insn(s, RRE, MSGR, a0, a2); - } else { - tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2); - } - break; +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGC, dest, base, TCG_REG_NONE, offset); +} - case INDEX_op_div2_i64: - /* - * ??? We get an unnecessary sign-extension of the dividend - * into op0 with this definition, but as we do in fact always - * produce both quotient and remainder using INDEX_op_div_i64 - * instead requires jumping through even more hoops. - */ - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DSGR, args[1], args[4]); - break; - case INDEX_op_divu2_i64: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert(args[1] == args[3]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, DLGR, args[1], args[4]); - break; - case INDEX_op_mulu2_i64: - tcg_debug_assert(args[0] == args[2]); - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRE, MLGR, args[1], args[3]); - break; - case INDEX_op_muls2_i64: - tcg_debug_assert((args[1] & 1) == 0); - tcg_debug_assert(args[0] == args[1] + 1); - tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]); - break; - - case INDEX_op_shl_i64: - op = RSY_SLLG; - do_shift64: - if (const_args[2]) { - tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, op, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_shr_i64: - op = RSY_SRLG; - goto do_shift64; - case INDEX_op_sar_i64: - op = RSY_SRAG; - goto do_shift64; +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, (64 - args[2]) & 63); - } else { - /* We can use the smaller 32-bit negate because only the - low 6 bits are examined for the rotate. */ - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); - } - break; +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LGB, dest, base, TCG_REG_NONE, offset); +} - case INDEX_op_add2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, ALGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, ALGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, ALCGR, args[1], args[5]); - break; - case INDEX_op_sub2_i64: - if (const_args[4]) { - if ((int64_t)args[4] >= 0) { - tcg_out_insn(s, RIL, SLGFI, args[0], args[4]); - } else { - tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]); - } - } else { - tcg_out_insn(s, RRE, SLGR, args[0], args[4]); - } - tcg_out_insn(s, RRE, SLBGR, args[1], args[5]); - break; +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; - case INDEX_op_brcond_i64: - tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], - args[1], const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2], false); - break; - case INDEX_op_negsetcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2], true); - break; - case INDEX_op_movcond_i64: - tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], - args[2], const_args[2], args[3], const_args[3], args[4]); - break; +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGH, dest, base, TCG_REG_NONE, offset); +} - OP_32_64(deposit): - a0 = args[0], a1 = args[1], a2 = args[2]; - if (const_args[1]) { - tgen_deposit(s, a0, a2, args[3], args[4], 1); - } else { - /* Since we can't support "0Z" as a constraint, we allow a1 in - any register. Fix things up as if a matching constraint. */ - if (a0 != a1) { - if (a0 == a2) { - tcg_out_mov(s, type, TCG_TMP0, a2); - a2 = TCG_TMP0; - } - tcg_out_mov(s, type, a0, a1); - } - tgen_deposit(s, a0, a2, args[3], args[4], 0); - } - break; +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; - OP_32_64(extract): - tgen_extract(s, args[0], args[1], args[2], args[3]); - break; - OP_32_64(sextract): - tgen_sextract(s, args[0], args[1], args[2], args[3]); - break; +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_LH, RXY_LHY, dest, base, TCG_REG_NONE, offset); + } else { + tcg_out_mem(s, 0, RXY_LGH, dest, base, TCG_REG_NONE, offset); + } +} - case INDEX_op_clz_i64: - tgen_clz(s, args[0], args[1], args[2], const_args[2]); - break; +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; - case INDEX_op_ctpop_i32: - tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]); - break; - case INDEX_op_ctpop_i64: - tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]); - break; +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LLGF, dest, base, TCG_REG_NONE, offset); +} - case INDEX_op_mb: - /* The host memory model is quite strong, we simply need to - serialize the instruction stream. */ - if (args[0] & TCG_MO_ST_LD) { - /* fast-bcr-serialization facility (45) is present */ - tcg_out_insn(s, RR, BCR, 14, 0); - } - break; +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); - } +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, 0, RXY_LGF, dest, base, TCG_REG_NONE, offset); } +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, RX_STC, RXY_STCY, data, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_mem(s, RX_STH, RXY_STHY, data, base, TCG_REG_NONE, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + + static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src) { @@ -3239,166 +3600,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(r, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - return C_O1_I2(r, r, ri); - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, r, rC); - - case INDEX_op_clz_i64: - return C_O1_I2(r, r, rI); - - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_and_i64: - return C_O1_I2(r, r, rNKR); - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - return C_O1_I2(r, r, rK); - - case INDEX_op_andc_i32: - case INDEX_op_orc_i32: - case INDEX_op_eqv_i32: - return C_O1_I2(r, r, ri); - case INDEX_op_andc_i64: - return C_O1_I2(r, r, rKR); - case INDEX_op_orc_i64: - case INDEX_op_eqv_i64: - return C_O1_I2(r, r, rNK); - - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - return C_O1_I2(r, r, r); - - case INDEX_op_mul_i32: - return (HAVE_FACILITY(MISC_INSN_EXT2) - ? C_O1_I2(r, r, ri) - : C_O1_I2(r, 0, ri)); - case INDEX_op_mul_i64: - return (HAVE_FACILITY(MISC_INSN_EXT2) - ? C_O1_I2(r, r, rJ) - : C_O1_I2(r, 0, rJ)); - - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - return C_O1_I2(r, r, ri); - - case INDEX_op_brcond_i32: - return C_O0_I2(r, ri); - case INDEX_op_brcond_i64: - return C_O0_I2(r, rC); - - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: - return C_O1_I1(r, r); - - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: - return C_O1_I1(r, r); - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_st_i32: - return C_O0_I2(r, r); - case INDEX_op_qemu_ld_i128: - return C_O2_I1(o, m, r); - case INDEX_op_qemu_st_i128: - return C_O0_I3(o, m, r); - - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - return C_O1_I2(r, rZ, r); - - case INDEX_op_movcond_i32: - return C_O1_I4(r, r, ri, rI, r); - case INDEX_op_movcond_i64: - return C_O1_I4(r, r, rC, rI, r); - - case INDEX_op_div2_i32: - case INDEX_op_div2_i64: - case INDEX_op_divu2_i32: - case INDEX_op_divu2_i64: - return C_O2_I3(o, m, 0, 1, r); - - case INDEX_op_mulu2_i64: - return C_O2_I2(o, m, 0, r); - case INDEX_op_muls2_i64: - return C_O2_I2(o, m, r, r); - - case INDEX_op_add2_i32: - case INDEX_op_sub2_i32: - return C_N1_O1_I4(r, r, 0, 1, ri, r); - - case INDEX_op_add2_i64: - case INDEX_op_sub2_i64: - return C_N1_O1_I4(r, r, 0, 1, rJU, r); - case INDEX_op_st_vec: return C_O0_I2(v, r); case INDEX_op_ld_vec: diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 61f9fa3..1a57adc 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -11,10 +11,11 @@ */ C_O0_I1(r) C_O0_I2(rz, r) -C_O0_I2(rz, rJ) +C_O0_I2(r, rJ) C_O1_I1(r, r) C_O1_I2(r, r, r) +C_O1_I2(r, r, rJ) C_O1_I2(r, rz, rJ) -C_O1_I4(r, rz, rJ, rI, 0) -C_O2_I2(r, r, rz, rJ) -C_O2_I4(r, r, rz, rz, rJ, rJ) +C_O1_I2(r, rz, rz) +C_O1_I4(r, r, rJ, rI, 0) +C_O2_I2(r, r, r, r) diff --git a/tcg/sparc64/tcg-target-has.h b/tcg/sparc64/tcg-target-has.h index 2f46df8..b29fd17 100644 --- a/tcg/sparc64/tcg-target-has.h +++ b/tcg/sparc64/tcg-target-has.h @@ -7,74 +7,9 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#if defined(__VIS__) && __VIS__ >= 0x300 -#define use_vis3_instructions 1 -#else -extern bool use_vis3_instructions; -#endif - /* optional instructions */ -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_rot_i32 0 -#define TCG_TARGET_HAS_ext8s_i32 0 -#define TCG_TARGET_HAS_ext16s_i32 0 -#define TCG_TARGET_HAS_ext8u_i32 0 -#define TCG_TARGET_HAS_ext16u_i32 0 -#define TCG_TARGET_HAS_bswap16_i32 0 -#define TCG_TARGET_HAS_bswap32_i32 0 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_eqv_i32 0 -#define TCG_TARGET_HAS_nand_i32 0 -#define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_negsetcond_i32 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 0 -#define TCG_TARGET_HAS_rot_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 0 -#define TCG_TARGET_HAS_ext16s_i64 0 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 0 -#define TCG_TARGET_HAS_bswap32_i64 0 -#define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions -#define TCG_TARGET_HAS_mulsh_i64 0 - #define TCG_TARGET_HAS_qemu_ldst_i128 0 - #define TCG_TARGET_HAS_tst 1 #define TCG_TARGET_extract_valid(type, ofs, len) \ diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 7c722f5..9e004fb 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -199,7 +199,9 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04)) #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14)) #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08)) +#define ARITH_ADDCCC (INSN_OP(2) | INSN_OP3(0x18)) #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c)) +#define ARITH_SUBCCC (INSN_OP(2) | INSN_OP3(0x1c)) #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a)) #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b)) #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e)) @@ -208,9 +210,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d)) #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d)) #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c)) +#define ARITH_POPC (INSN_OP(2) | INSN_OP3(0x2e)) #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f)) #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11)) +#define ARITH_ADDXCCC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x13)) #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16)) #define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25)) @@ -223,6 +227,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0)) #define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0)) +#define WRCCR (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(2)) #define JMPL (INSN_OP(2) | INSN_OP3(0x38)) #define RETURN (INSN_OP(2) | INSN_OP3(0x39)) #define SAVE (INSN_OP(2) | INSN_OP3(0x3c)) @@ -270,8 +275,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE)) #define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE)) -#ifndef use_vis3_instructions -bool use_vis3_instructions; +static bool use_popc_instructions; +#if defined(__VIS__) && __VIS__ >= 0x300 +#define use_vis3_instructions 1 +#else +static bool use_vis3_instructions; #endif static bool check_fit_i64(int64_t val, unsigned int bits) @@ -366,7 +374,7 @@ static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1, } static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int op) + int32_t val2, int val2const, int op) { tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2))); @@ -596,21 +604,6 @@ static void tcg_out_sety(TCGContext *s, TCGReg rs) tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); } -static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, - int32_t val2, int val2const, int uns) -{ - /* Load Y with the sign/zero extension of RS1 to 64-bits. */ - if (uns) { - tcg_out_sety(s, TCG_REG_G0); - } else { - tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA); - tcg_out_sety(s, TCG_REG_T1); - } - - tcg_out_arithc(s, rd, rs1, val2, val2const, - uns ? ARITH_UDIV : ARITH_SDIV); -} - static const uint8_t tcg_cond_to_bcond[16] = { [TCG_COND_EQ] = COND_E, [TCG_COND_NE] = COND_NE, @@ -652,6 +645,12 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) tcg_out_bpcc0(s, scond, flags, off19); } +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_bpcc(s, COND_A, BPCC_PT, l); + tcg_out_nop(s); +} + static void tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg c1, int32_t c2, int c2const) { @@ -667,11 +666,10 @@ static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_nop(s); } -static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret, +static void tcg_out_movcc(TCGContext *s, int scond, int cc, TCGReg ret, int32_t v1, int v1const) { - tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) - | INSN_RS1(tcg_cond_to_bcond[cond]) + tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret) | INSN_RS1(scond) | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1))); } @@ -680,7 +678,7 @@ static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, int32_t v1, int v1const) { tcg_out_cmp(s, cond, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], MOVCC_ICC, ret, v1, v1const); } static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, @@ -724,12 +722,12 @@ static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, tcg_out_movr(s, rcond, ret, c1, v1, v1const); } else { tcg_out_cmp(s, cond, c1, c2, c2const); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], MOVCC_XCC, ret, v1, v1const); } } static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, bool neg) + TCGReg c1, int32_t c2, bool c2const, bool neg) { /* For 32-bit comparisons, we can play games with ADDC/SUBC. */ switch (cond) { @@ -749,7 +747,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, } c1 = TCG_REG_G0, c2const = 0; cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU); - break; + break; case TCG_COND_TSTEQ: case TCG_COND_TSTNE: @@ -758,7 +756,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, c1 = TCG_REG_G0; c2 = TCG_REG_T1, c2const = 0; cond = (cond == TCG_COND_TSTEQ ? TCG_COND_GEU : TCG_COND_LTU); - break; + break; case TCG_COND_GTU: case TCG_COND_LEU: @@ -778,7 +776,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, default: tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_ICC, ret, neg ? -1 : 1, 1); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], + MOVCC_ICC, ret, neg ? -1 : 1, 1); return; } @@ -803,7 +802,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret, } static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg c1, int32_t c2, int c2const, bool neg) + TCGReg c1, int32_t c2, bool c2const, bool neg) { int rcond; @@ -833,78 +832,103 @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret, } else { tcg_out_cmp(s, cond, c1, c2, c2const); tcg_out_movi_s13(s, ret, 0); - tcg_out_movcc(s, cond, MOVCC_XCC, ret, neg ? -1 : 1, 1); + tcg_out_movcc(s, tcg_cond_to_bcond[cond], + MOVCC_XCC, ret, neg ? -1 : 1, 1); } } -static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, int opl, int oph) +static void tcg_out_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGArg arg2, bool const_arg2, + TCGLabel *l) { - TCGReg tmp = TCG_REG_T1; - - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; + if (type == TCG_TYPE_I32) { + tcg_out_brcond_i32(s, cond, arg1, arg2, const_arg2, l); + } else { + tcg_out_brcond_i64(s, cond, arg1, arg2, const_arg2, l); } +} - tcg_out_arithc(s, tmp, al, bl, blconst, opl); - tcg_out_arithc(s, rh, ah, bh, bhconst, oph); - tcg_out_mov(s, TCG_TYPE_I32, rl, tmp); +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, TCGReg arg2, TCGLabel *l) +{ + tcg_out_brcond(s, type, cond, arg1, arg2, false, l); } -static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh, - TCGReg al, TCGReg ah, int32_t bl, int blconst, - int32_t bh, int bhconst, bool is_sub) +static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg1, tcg_target_long arg2, TCGLabel *l) { - TCGReg tmp = TCG_REG_T1; + tcg_out_brcond(s, type, cond, arg1, arg2, true, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, rJ), + .out_rr = tgen_brcond, + .out_ri = tgen_brcondi, +}; - /* Note that the low parts are fully consumed before tmp is set. */ - if (rl != ah && (bhconst || rl != bh)) { - tmp = rl; +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, + TCGArg c2, bool c2const, bool neg) +{ + if (type == TCG_TYPE_I32) { + tcg_out_setcond_i32(s, cond, ret, c1, c2, c2const, neg); + } else { + tcg_out_setcond_i64(s, cond, ret, c1, c2, c2const, neg); } +} - tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC); +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false); +} - if (use_vis3_instructions && !is_sub) { - /* Note that ADDXC doesn't accept immediates. */ - if (bhconst && bh != 0) { - tcg_out_movi_s13(s, TCG_REG_T2, bh); - bh = TCG_REG_T2; - } - tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC); - } else if (bh == TCG_REG_G0) { - /* If we have a zero, we can perform the operation in two insns, - with the arithmetic first, and a conditional move into place. */ - if (rh == ah) { - tcg_out_arithi(s, TCG_REG_T2, ah, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0); - } else { - tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD); - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0); - } +static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_setcond, + .out_rri = tgen_setcondi, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true); +} + +static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, tcg_target_long arg2) +{ + tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_negsetcond, + .out_rri = tgen_negsetcondi, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool c2const, + TCGArg v1, bool v1const, TCGArg v2, bool v2consf) +{ + if (type == TCG_TYPE_I32) { + tcg_out_movcond_i32(s, cond, ret, c1, c2, c2const, v1, v1const); } else { - /* - * Otherwise adjust BH as if there is carry into T2. - * Note that constant BH is constrained to 11 bits for the MOVCC, - * so the adjustment fits 12 bits. - */ - if (bhconst) { - tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1)); - } else { - tcg_out_arithi(s, TCG_REG_T2, bh, 1, - is_sub ? ARITH_SUB : ARITH_ADD); - } - /* ... smoosh T2 back to original BH if carry is clear ... */ - tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst); - /* ... and finally perform the arithmetic with the new operand. */ - tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD); + tcg_out_movcond_i64(s, cond, ret, c1, c2, c2const, v1, v1const); } - - tcg_out_mov(s, TCG_TYPE_I64, rl, tmp); } +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, rJ, rI, 0), + .out = tgen_movcond, +}; + static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest, bool in_prologue, bool tail_call) { @@ -935,7 +959,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, tcg_out_nop(s); } -static void tcg_out_mb(TCGContext *s, TCGArg a0) +static void tcg_out_mb(TCGContext *s, unsigned a0) { /* Note that the TCG memory order constants mirror the Sparc MEMBAR. */ tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL)); @@ -1166,8 +1190,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, return ldst; } -static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, - MemOpIdx oi, TCGType data_type) +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = { [MO_UB] = LDUB, @@ -1199,14 +1223,23 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } -static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, - MemOpIdx oi, TCGType data_type) +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) { static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = { [MO_UB] = STB, @@ -1229,12 +1262,21 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]); if (ldst) { - ldst->type = data_type; + ldst->type = type; ldst->datalo_reg = data; ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } } +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out = tgen_qemu_st, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { if (check_fit_ptr(a0, 13)) { @@ -1280,369 +1322,794 @@ static void tcg_out_goto_tb(TCGContext *s, int which) } } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); + tcg_out_mov_delay(s, TCG_REG_TB, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGArg a0, a1, a2; - int c, c2; + tcg_out_arith(s, a0, a1, a2, ARITH_ADD); +} - /* Hoist the loads of the most common arguments. */ - a0 = args[0]; - a1 = args[1]; - a2 = args[2]; - c2 = const_args[2]; +static void tgen_addi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_ADD); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); - tcg_out_mov_delay(s, TCG_REG_TB, a0); - break; - case INDEX_op_br: - tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); - tcg_out_nop(s); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_add, + .out_rri = tgen_addi, +}; -#define OP_32_64(x) \ - glue(glue(case INDEX_op_, x), _i32): \ - glue(glue(case INDEX_op_, x), _i64) +static void tgen_addco_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ADDCC); +} - OP_32_64(ld8u): - tcg_out_ldst(s, a0, a1, a2, LDUB); - break; - OP_32_64(ld8s): - tcg_out_ldst(s, a0, a1, a2, LDSB); - break; - OP_32_64(ld16u): - tcg_out_ldst(s, a0, a1, a2, LDUH); - break; - OP_32_64(ld16s): - tcg_out_ldst(s, a0, a1, a2, LDSH); - break; - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - tcg_out_ldst(s, a0, a1, a2, LDUW); - break; - OP_32_64(st8): - tcg_out_ldst(s, a0, a1, a2, STB); - break; - OP_32_64(st16): - tcg_out_ldst(s, a0, a1, a2, STH); - break; - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - tcg_out_ldst(s, a0, a1, a2, STW); - break; - OP_32_64(add): - c = ARITH_ADD; - goto gen_arith; - OP_32_64(sub): - c = ARITH_SUB; - goto gen_arith; - OP_32_64(and): - c = ARITH_AND; - goto gen_arith; - OP_32_64(andc): - c = ARITH_ANDN; - goto gen_arith; - OP_32_64(or): - c = ARITH_OR; - goto gen_arith; - OP_32_64(orc): - c = ARITH_ORN; - goto gen_arith; - OP_32_64(xor): - c = ARITH_XOR; - goto gen_arith; - case INDEX_op_shl_i32: - c = SHIFT_SLL; - do_shift32: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 31, c2, c); - break; - case INDEX_op_shr_i32: - c = SHIFT_SRL; - goto do_shift32; - case INDEX_op_sar_i32: - c = SHIFT_SRA; - goto do_shift32; - case INDEX_op_mul_i32: - c = ARITH_UMUL; - goto gen_arith; - - OP_32_64(neg): - c = ARITH_SUB; - goto gen_arith1; - OP_32_64(not): - c = ARITH_ORN; - goto gen_arith1; - - case INDEX_op_div_i32: - tcg_out_div32(s, a0, a1, a2, c2, 0); - break; - case INDEX_op_divu_i32: - tcg_out_div32(s, a0, a1, a2, c2, 1); - break; +static void tgen_addco_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDCC); +} - case INDEX_op_brcond_i32: - tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, false); - break; - case INDEX_op_negsetcond_i32: - tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2, true); - break; - case INDEX_op_movcond_i32: - tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_addco_rrr, + .out_rri = tgen_addco_rri, +}; - case INDEX_op_add2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_ADDCC, ARITH_ADDC); - break; - case INDEX_op_sub2_i32: - tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3], - args[4], const_args[4], args[5], const_args[5], - ARITH_SUBCC, ARITH_SUBC); - break; - case INDEX_op_mulu2_i32: - c = ARITH_UMUL; - goto do_mul2; - case INDEX_op_muls2_i32: - c = ARITH_SMUL; - do_mul2: - /* The 32-bit multiply insns produce a full 64-bit result. */ - tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); - break; +static void tgen_addci_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDC); + } else if (use_vis3_instructions) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDXC); + } else { + tcg_out_arith(s, TCG_REG_T1, a1, a2, ARITH_ADD); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_ADD); /* for CS */ + /* Select the correct result based on actual carry value. */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } +} - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); - break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); - break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); - break; +static void tgen_addci_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDC); + return; + } + /* !use_vis3_instructions */ + if (a2 != 0) { + tcg_out_arithi(s, TCG_REG_T1, a1, a2, ARITH_ADD); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_ADD); /* for CS */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } else if (a0 == a1) { + tcg_out_arithi(s, TCG_REG_T1, a1, 1, ARITH_ADD); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, a0, TCG_REG_T1, false); + } else { + tcg_out_arithi(s, a0, a1, 1, ARITH_ADD); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, a1, false); + } +} - case INDEX_op_ld32s_i64: - tcg_out_ldst(s, a0, a1, a2, LDSW); - break; - case INDEX_op_ld_i64: - tcg_out_ldst(s, a0, a1, a2, LDX); - break; - case INDEX_op_st_i64: - tcg_out_ldst(s, a0, a1, a2, STX); - break; - case INDEX_op_shl_i64: - c = SHIFT_SLLX; - do_shift64: - /* Limit immediate shift count lest we create an illegal insn. */ - tcg_out_arithc(s, a0, a1, a2 & 63, c2, c); - break; - case INDEX_op_shr_i64: - c = SHIFT_SRLX; - goto do_shift64; - case INDEX_op_sar_i64: - c = SHIFT_SRAX; - goto do_shift64; - case INDEX_op_mul_i64: - c = ARITH_MULX; - goto gen_arith; - case INDEX_op_div_i64: - c = ARITH_SDIVX; - goto gen_arith; - case INDEX_op_divu_i64: - c = ARITH_UDIVX; - goto gen_arith; - - case INDEX_op_brcond_i64: - tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3])); - break; - case INDEX_op_setcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, false); - break; - case INDEX_op_negsetcond_i64: - tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2, true); - break; - case INDEX_op_movcond_i64: - tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]); - break; - case INDEX_op_add2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], false); - break; - case INDEX_op_sub2_i64: - tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4], - const_args[4], args[5], const_args[5], true); - break; - case INDEX_op_muluh_i64: - tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI); - break; +static TCGConstraintSetIndex cset_addci(TCGType type, unsigned flags) +{ + if (use_vis3_instructions && type == TCG_TYPE_I64) { + /* Note that ADDXC doesn't accept immediates. */ + return C_O1_I2(r, rz, rz); + } + return C_O1_I2(r, rz, rJ); +} - gen_arith: - tcg_out_arithc(s, a0, a1, a2, c2, c); - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addci, + .out_rrr = tgen_addci_rrr, + .out_rri = tgen_addci_rri, +}; - gen_arith1: - tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c); - break; +/* Copy %xcc.c to %icc.c */ +static void tcg_out_dup_xcc_c(TCGContext *s) +{ + if (use_vis3_instructions) { + tcg_out_arith(s, TCG_REG_T1, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC); + } else { + tcg_out_movi_s13(s, TCG_REG_T1, 0); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, TCG_REG_T1, 1, true); + } + /* Write carry-in into %icc via {0,1} + -1. */ + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, -1, ARITH_ADDCC); +} - case INDEX_op_mb: - tcg_out_mb(s, a0); - break; +static void tgen_addcio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + if (use_vis3_instructions) { + tcg_out_arith(s, a0, a1, a2, ARITH_ADDXCCC); + return; + } + tcg_out_dup_xcc_c(s); + } + tcg_out_arith(s, a0, a1, a2, ARITH_ADDCCC); +} - case INDEX_op_extract_i64: - tcg_debug_assert(a2 + args[3] == 32); - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRL); - break; - case INDEX_op_sextract_i64: - tcg_debug_assert(a2 + args[3] == 32); - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRA); - break; +static void tgen_addcio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type != TCG_TYPE_I32) { + /* !use_vis3_instructions */ + tcg_out_dup_xcc_c(s); + } + tcg_out_arithi(s, a0, a1, a2, ARITH_ADDCCC); +} - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - default: - g_assert_not_reached(); +static TCGConstraintSetIndex cset_addcio(TCGType type, unsigned flags) +{ + if (use_vis3_instructions && type == TCG_TYPE_I64) { + /* Note that ADDXCCC doesn't accept immediates. */ + return C_O1_I2(r, rz, rz); } + return C_O1_I2(r, rz, rJ); } -static TCGConstraintSetIndex -tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addcio, + .out_rrr = tgen_addcio_rrr, + .out_rri = tgen_addcio_rri, +}; + +static void tcg_out_set_carry(TCGContext *s) +{ + /* 0x11 -> xcc = nzvC, icc = nzvC */ + tcg_out_arithi(s, 0, TCG_REG_G0, 0x11, WRCCR); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_AND); +} + +static void tgen_andi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_AND); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_and, + .out_rri = tgen_andi, +}; + +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i64: - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_ld_i64: + tcg_out_arith(s, a0, a1, a2, ARITH_ANDN); +} + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_arith(s, a0, TCG_REG_G0, a1, ARITH_POPC); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + if (use_popc_instructions && type == TCG_TYPE_I64) { return C_O1_I1(r, r); + } + return C_NotImplemented; +} - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st_i32: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st_i64: - return C_O0_I2(rz, r); - - case INDEX_op_add_i32: - case INDEX_op_add_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_div_i32: - case INDEX_op_div_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: - return C_O1_I2(r, rz, rJ); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(rz, rJ); - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - return C_O1_I4(r, rz, rJ, rI, 0); - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, rz, rz, rJ, rJ); - case INDEX_op_mulu2_i32: - case INDEX_op_muls2_i32: - return C_O2_I2(r, r, rz, rJ); - case INDEX_op_muluh_i64: - return C_O1_I2(r, r, r); +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; - default: - return C_NotImplemented; +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divs_rJ(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGArg a2, bool c2) +{ + uint32_t insn; + + if (type == TCG_TYPE_I32) { + /* Load Y with the sign extension of a1 to 64-bits. */ + tcg_out_arithi(s, TCG_REG_T1, a1, 31, SHIFT_SRA); + tcg_out_sety(s, TCG_REG_T1); + insn = ARITH_SDIV; + } else { + insn = ARITH_SDIVX; + } + tcg_out_arithc(s, a0, a1, a2, c2, insn); +} + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_divs_rJ(s, type, a0, a1, a2, false); +} + +static void tgen_divsi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_divs_rJ(s, type, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_divs, + .out_rri = tgen_divsi, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu_rJ(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGArg a2, bool c2) +{ + uint32_t insn; + + if (type == TCG_TYPE_I32) { + /* Load Y with the zero extension to 64-bits. */ + tcg_out_sety(s, TCG_REG_G0); + insn = ARITH_UDIV; + } else { + insn = ARITH_UDIVX; + } + tcg_out_arithc(s, a0, a1, a2, c2, insn); +} + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tgen_divu_rJ(s, type, a0, a1, a2, false); +} + +static void tgen_divui(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tgen_divu_rJ(s, type, a0, a1, a2, true); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_divu, + .out_rri = tgen_divui, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? ARITH_UMUL : ARITH_MULX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_muli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? ARITH_UMUL : ARITH_MULX; + tcg_out_arithi(s, a0, a1, a2, insn); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_mul, + .out_rri = tgen_muli, +}; + +/* + * The 32-bit multiply insns produce a full 64-bit result. + * Supporting 32-bit mul[us]2 opcodes avoids sign/zero-extensions + * before the actual multiply; we only need extract the high part + * into the separate operand. + */ +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_I32 ? C_O2_I2(r, r, r, r) : C_NotImplemented; +} + +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_arith(s, a0, a2, a3, ARITH_SMUL); + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_arith(s, a0, a2, a3, ARITH_UMUL); + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + +static void tgen_muluh(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_UMULXHI); +} + +static TCGConstraintSetIndex cset_muluh(TCGType type, unsigned flags) +{ + return (type == TCG_TYPE_I64 && use_vis3_instructions + ? C_O1_I2(r, r, r) : C_NotImplemented); +} + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_muluh, + .out_rrr = tgen_muluh, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_OR); +} + +static void tgen_ori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_OR); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_or, + .out_rri = tgen_ori, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_ORN); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRA : SHIFT_SRAX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_sari(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRA : SHIFT_SRAX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_sar, + .out_rri = tgen_sari, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SLL : SHIFT_SLLX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_shli(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SLL : SHIFT_SLLX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_shl, + .out_rri = tgen_shli, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRL : SHIFT_SRLX; + tcg_out_arith(s, a0, a1, a2, insn); +} + +static void tgen_shri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + uint32_t insn = type == TCG_TYPE_I32 ? SHIFT_SRL : SHIFT_SRLX; + uint32_t mask = type == TCG_TYPE_I32 ? 31 : 63; + tcg_out_arithi(s, a0, a1, a2 & mask, insn); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_shr, + .out_rri = tgen_shri, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_SUB); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_SUBCC); +} + +static void tgen_subbo_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBCC); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbo_rrr, + .out_rri = tgen_subbo_rri, +}; + +static void tgen_subbi_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + /* TODO: OSA 2015 added SUBXC */ + if (type == TCG_TYPE_I32) { + tcg_out_arith(s, a0, a1, a2, ARITH_SUBC); + } else { + tcg_out_arith(s, TCG_REG_T1, a1, a2, ARITH_SUB); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_SUB); /* for CS */ + /* Select the correct result based on actual borrow value. */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); } } +static void tgen_subbi_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type == TCG_TYPE_I32) { + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBC); + } else if (a2 != 0) { + tcg_out_arithi(s, TCG_REG_T1, a1, a2, ARITH_SUB); /* for CC */ + tcg_out_arithi(s, a0, TCG_REG_T1, 1, ARITH_SUB); /* for CS */ + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, TCG_REG_T1, false); + } else if (a0 == a1) { + tcg_out_arithi(s, TCG_REG_T1, a1, 1, ARITH_SUB); + tcg_out_movcc(s, COND_CS, MOVCC_XCC, a0, TCG_REG_T1, false); + } else { + tcg_out_arithi(s, a0, a1, 1, ARITH_SUB); + tcg_out_movcc(s, COND_CC, MOVCC_XCC, a0, a1, false); + } +} + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbi_rrr, + .out_rri = tgen_subbi_rri, +}; + +static void tgen_subbio_rrr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type != TCG_TYPE_I32) { + /* TODO: OSA 2015 added SUBXCCC */ + tcg_out_dup_xcc_c(s); + } + tcg_out_arith(s, a0, a1, a2, ARITH_SUBCCC); +} + +static void tgen_subbio_rri(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + if (type != TCG_TYPE_I32) { + tcg_out_dup_xcc_c(s); + } + tcg_out_arithi(s, a0, a1, a2, ARITH_SUBCCC); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_O1_I2(r, rz, rJ), + .out_rrr = tgen_subbio_rrr, + .out_rri = tgen_subbio_rri, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out_set_carry(s); /* borrow == carry */ +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_arith(s, a0, a1, a2, ARITH_XOR); +} + +static void tgen_xori(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2) +{ + tcg_out_arithi(s, a0, a1, a2, ARITH_XOR); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, rJ), + .out_rrr = tgen_xor, + .out_rri = tgen_xori, +}; + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_sub(s, type, a0, TCG_REG_G0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tgen_orc(s, type, a0, TCG_REG_G0, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_debug_assert(ofs + len == 32); + tcg_out_arithi(s, a0, a1, ofs, SHIFT_SRL); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extract, +}; + +static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len) +{ + tcg_debug_assert(ofs + len == 32); + tcg_out_arithi(s, a0, a1, ofs, SHIFT_SRA); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUB); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSB); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUH); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSH); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDUW); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, dest, base, offset, LDSW); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, data, base, offset, STB); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st8_r, +}; + +static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, data, base, offset, STH); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tgen_st16_r, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(rz, r), + .out_r = tcg_out_st, +}; + + +static TCGConstraintSetIndex +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) +{ + return C_NotImplemented; +} + static void tcg_target_init(TCGContext *s) { + unsigned long hwcap = qemu_getauxval(AT_HWCAP); + /* * Only probe for the platform and capabilities if we haven't already * determined maximum values at compile time. */ + use_popc_instructions = (hwcap & HWCAP_SPARC_POPC) != 0; #ifndef use_vis3_instructions - { - unsigned long hwcap = qemu_getauxval(AT_HWCAP); - use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; - } + use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0; #endif tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h index 418e467..2fc0e50 100644 --- a/tcg/tcg-has.h +++ b/tcg/tcg-has.h @@ -12,53 +12,6 @@ #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_rem_i64 0 -#define TCG_TARGET_HAS_div2_i64 0 -#define TCG_TARGET_HAS_rot_i64 0 -#define TCG_TARGET_HAS_ext8s_i64 0 -#define TCG_TARGET_HAS_ext16s_i64 0 -#define TCG_TARGET_HAS_ext32s_i64 0 -#define TCG_TARGET_HAS_ext8u_i64 0 -#define TCG_TARGET_HAS_ext16u_i64 0 -#define TCG_TARGET_HAS_ext32u_i64 0 -#define TCG_TARGET_HAS_bswap16_i64 0 -#define TCG_TARGET_HAS_bswap32_i64 0 -#define TCG_TARGET_HAS_bswap64_i64 0 -#define TCG_TARGET_HAS_not_i64 0 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 -#define TCG_TARGET_HAS_nand_i64 0 -#define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_add2_i64 0 -#define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i64 0 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 -/* Turn some undef macros into true macros. */ -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#endif - -/* Only one of DIV or DIV2 should be defined. */ -#if defined(TCG_TARGET_HAS_div_i32) -#define TCG_TARGET_HAS_div2_i32 0 -#elif defined(TCG_TARGET_HAS_div2_i32) -#define TCG_TARGET_HAS_div_i32 0 -#define TCG_TARGET_HAS_rem_i32 0 -#endif -#if defined(TCG_TARGET_HAS_div_i64) -#define TCG_TARGET_HAS_div2_i64 0 -#elif defined(TCG_TARGET_HAS_div2_i64) -#define TCG_TARGET_HAS_div_i64 0 -#define TCG_TARGET_HAS_rem_i64 0 #endif #if !defined(TCG_TARGET_HAS_v64) \ diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index ff85fb2..d6a12af 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -107,8 +107,8 @@ void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e); TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, - TCGOpcode opc, unsigned nargs); + TCGOpcode, TCGType, unsigned nargs); TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, - TCGOpcode opc, unsigned nargs); + TCGOpcode, TCGType, unsigned nargs); #endif /* TCG_INTERNAL_H */ diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index 3b073b4..fa9e522 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -88,24 +88,40 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st) return op; } -static void gen_ldst(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, - TCGTemp *addr, MemOpIdx oi) +static void gen_ldst1(TCGOpcode opc, TCGType type, TCGTemp *v, + TCGTemp *addr, MemOpIdx oi) { - if (vh) { - tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi); + TCGOp *op = tcg_gen_op3(opc, type, temp_arg(v), temp_arg(addr), oi); + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; +} + +static void gen_ldst2(TCGOpcode opc, TCGType type, TCGTemp *vl, TCGTemp *vh, + TCGTemp *addr, MemOpIdx oi) +{ + TCGOp *op = tcg_gen_op4(opc, type, temp_arg(vl), temp_arg(vh), + temp_arg(addr), oi); + TCGOP_FLAGS(op) = get_memop(oi) & MO_SIZE; +} + +static void gen_ld_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) +{ + if (TCG_TARGET_REG_BITS == 32) { + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I64, + tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), + addr, oi); } else { - tcg_gen_op3(opc, type, temp_arg(vl), temp_arg(addr), oi); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); } } -static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) +static void gen_st_i64(TCGv_i64 v, TCGTemp *addr, MemOpIdx oi) { if (TCG_TARGET_REG_BITS == 32) { - TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v)); - TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v)); - gen_ldst(opc, TCG_TYPE_I64, vl, vh, addr, oi); + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I64, + tcgv_i32_temp(TCGV_LOW(v)), tcgv_i32_temp(TCGV_HIGH(v)), + addr, oi); } else { - gen_ldst(opc, TCG_TYPE_I64, tcgv_i64_temp(v), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I64, tcgv_i64_temp(v), addr, oi); } } @@ -232,8 +248,7 @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr, } copy_addr = plugin_maybe_preserve_addr(addr); - gen_ldst(INDEX_op_qemu_ld_i32, TCG_TYPE_I32, - tcgv_i32_temp(val), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_ld, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); @@ -266,7 +281,6 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, { TCGv_i32 swap = NULL; MemOpIdx orig_oi, oi; - TCGOpcode opc; tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); memop = tcg_canonicalize_memop(memop, 0, 1); @@ -289,12 +303,7 @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) { - opc = INDEX_op_qemu_st8_i32; - } else { - opc = INDEX_op_qemu_st_i32; - } - gen_ldst(opc, TCG_TYPE_I32, tcgv_i32_temp(val), NULL, addr, oi); + gen_ldst1(INDEX_op_qemu_st, TCG_TYPE_I32, tcgv_i32_temp(val), addr, oi); plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { @@ -341,7 +350,7 @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr, } copy_addr = plugin_maybe_preserve_addr(addr); - gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi); + gen_ld_i64(val, addr, oi); plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R); @@ -408,7 +417,7 @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr, oi = make_memop_idx(memop, idx); } - gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi); + gen_st_i64(val, addr, oi); plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W); if (swap) { @@ -547,8 +556,8 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } - gen_ldst(INDEX_op_qemu_ld_i128, TCG_TYPE_I128, tcgv_i64_temp(lo), - tcgv_i64_temp(hi), addr, oi); + gen_ldst2(INDEX_op_qemu_ld2, TCG_TYPE_I128, tcgv_i64_temp(lo), + tcgv_i64_temp(hi), addr, oi); if (need_bswap) { tcg_gen_bswap64_i64(lo, lo); @@ -576,8 +585,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, y = TCGV128_LOW(val); } - gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, - make_memop_idx(mop[0], idx)); + gen_ld_i64(x, addr, make_memop_idx(mop[0], idx)); if (need_bswap) { tcg_gen_bswap64_i64(x, x); @@ -593,8 +601,7 @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr, addr_p8 = tcgv_i64_temp(t); } - gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, - make_memop_idx(mop[1], idx)); + gen_ld_i64(y, addr_p8, make_memop_idx(mop[1], idx)); tcg_temp_free_internal(addr_p8); if (need_bswap) { @@ -658,8 +665,8 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, hi = TCGV128_HIGH(val); } - gen_ldst(INDEX_op_qemu_st_i128, TCG_TYPE_I128, - tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); + gen_ldst2(INDEX_op_qemu_st2, TCG_TYPE_I128, + tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi); if (need_bswap) { tcg_temp_free_i64(lo); @@ -686,8 +693,7 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, x = b; } - gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, - make_memop_idx(mop[0], idx)); + gen_st_i64(x, addr, make_memop_idx(mop[0], idx)); if (tcg_ctx->addr_type == TCG_TYPE_I32) { TCGv_i32 t = tcg_temp_ebb_new_i32(); @@ -701,12 +707,10 @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr, if (b) { tcg_gen_bswap64_i64(b, y); - gen_ldst_i64(INDEX_op_qemu_st_i64, b, addr_p8, - make_memop_idx(mop[1], idx)); + gen_st_i64(b, addr_p8, make_memop_idx(mop[1], idx)); tcg_temp_free_i64(b); } else { - gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, - make_memop_idx(mop[1], idx)); + gen_st_i64(y, addr_p8, make_memop_idx(mop[1], idx)); } tcg_temp_free_internal(addr_p8); } else { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index fec6d67..dfa5c38 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -249,24 +249,6 @@ static void DNI tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, tcgv_i64_arg(a3), a4, a5); } -static void DNI tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, - TCGv_i32 a3, TCGv_i32 a4, - TCGv_i32 a5, TCGv_i32 a6) -{ - tcg_gen_op6(opc, TCG_TYPE_I32, tcgv_i32_arg(a1), tcgv_i32_arg(a2), - tcgv_i32_arg(a3), tcgv_i32_arg(a4), tcgv_i32_arg(a5), - tcgv_i32_arg(a6)); -} - -static void DNI tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 a1, TCGv_i64 a2, - TCGv_i64 a3, TCGv_i64 a4, - TCGv_i64 a5, TCGv_i64 a6) -{ - tcg_gen_op6(opc, TCG_TYPE_I64, tcgv_i64_arg(a1), tcgv_i64_arg(a2), - tcgv_i64_arg(a3), tcgv_i64_arg(a4), tcgv_i64_arg(a5), - tcgv_i64_arg(a6)); -} - static void DNI tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 a1, TCGv_i32 a2, TCGv_i32 a3, TCGv_i32 a4, TCGv_i32 a5, TCGArg a6) @@ -351,7 +333,7 @@ void tcg_gen_discard_i32(TCGv_i32 arg) void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) { if (ret != arg) { - tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); + tcg_gen_op2_i32(INDEX_op_mov, ret, arg); } } @@ -362,7 +344,7 @@ void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_add, ret, arg1, arg2); } void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -377,7 +359,7 @@ void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_sub, ret, arg1, arg2); } void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) @@ -396,12 +378,12 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) { - tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); + tcg_gen_op2_i32(INDEX_op_neg, ret, arg); } void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_and, ret, arg1, arg2); } void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -414,17 +396,19 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) case -1: tcg_gen_mov_i32(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i32. */ - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz32(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg1, 0, len); + return; + } } break; } @@ -434,7 +418,7 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_or, ret, arg1, arg2); } void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -451,7 +435,7 @@ void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_xor, ret, arg1, arg2); } void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -459,9 +443,10 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) { + } else if (arg2 == -1 && + tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { /* Don't recurse with tcg_gen_not_i32. */ - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1); + tcg_gen_op2_i32(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2)); } @@ -469,8 +454,8 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_not_i32) { - tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); + if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_not, ret, arg); } else { tcg_gen_xori_i32(ret, arg, -1); } @@ -478,7 +463,7 @@ void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_shl, ret, arg1, arg2); } void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -493,7 +478,7 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_shr, ret, arg1, arg2); } void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -508,7 +493,7 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_sar, ret, arg1, arg2); } void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -526,7 +511,7 @@ void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) if (cond == TCG_COND_ALWAYS) { tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond_i32, + TCGOp *op = tcg_gen_op4ii_i32(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); add_as_label_use(l, op); } @@ -549,7 +534,7 @@ void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); } else { - tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); + tcg_gen_op4i_i32(INDEX_op_setcond, ret, arg1, arg2, cond); } } @@ -566,11 +551,8 @@ void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret, tcg_gen_movi_i32(ret, -1); } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i32(ret, 0); - } else if (TCG_TARGET_HAS_negsetcond_i32) { - tcg_gen_op4i_i32(INDEX_op_negsetcond_i32, ret, arg1, arg2, cond); } else { - tcg_gen_setcond_i32(cond, ret, arg1, arg2); - tcg_gen_neg_i32(ret, ret); + tcg_gen_op4i_i32(INDEX_op_negsetcond, ret, arg1, arg2, cond); } } @@ -582,7 +564,7 @@ void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret, void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, ret, arg1, arg2); } void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) @@ -598,12 +580,12 @@ void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { - tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divs, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_div_i32(ret, arg1, arg2); @@ -612,18 +594,18 @@ void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { - tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rems, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t0, arg1, 31); - tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i32(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i32(t0); } else { gen_helper_rem_i32(ret, arg1, arg2); @@ -632,12 +614,12 @@ void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_div_i32) { - tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i32) { + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_divu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, zero, arg2); + tcg_gen_op5_i32(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_divu_i32(ret, arg1, arg2); @@ -646,18 +628,18 @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rem_i32) { - tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i32) { + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_remu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i32(t0, t0, arg2); tcg_gen_sub_i32(ret, arg1, t0); tcg_temp_free_i32(t0); - } else if (TCG_TARGET_HAS_div2_i32) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I32, 0)) { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, zero, arg2); + tcg_gen_op5_i32(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i32(t0); } else { gen_helper_remu_i32(ret, arg1, arg2); @@ -666,8 +648,8 @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_andc_i32) { - tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -678,8 +660,8 @@ void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_eqv_i32) { - tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -688,8 +670,8 @@ void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nand_i32) { - tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -698,8 +680,8 @@ void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_nor_i32) { - tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i32(ret, arg1, arg2); tcg_gen_not_i32(ret, ret); @@ -708,8 +690,8 @@ void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_orc_i32) { - tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_not_i32(t0, arg2); @@ -720,9 +702,9 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_clz_i32) { - tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_clz_i64) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_clz, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -745,9 +727,13 @@ void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_ctz_i32) { - tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctz_i64) { + TCGv_i32 z, t; + + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_ctz, ret, arg1, arg2); + return; + } + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t1, arg1); @@ -756,34 +742,34 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_extrl_i64_i32(ret, t1); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); - } else if (TCG_TARGET_HAS_ctpop_i32 - || TCG_TARGET_HAS_ctpop_i64 - || TCG_TARGET_HAS_clz_i32 - || TCG_TARGET_HAS_clz_i64) { - TCGv_i32 z, t = tcg_temp_ebb_new_i32(); - - if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i32(t, arg1, 1); - tcg_gen_andc_i32(t, t, arg1); - tcg_gen_ctpop_i32(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */ - tcg_gen_neg_i32(t, arg1); - tcg_gen_and_i32(t, t, arg1); - tcg_gen_clzi_i32(t, t, 32); - tcg_gen_xori_i32(t, t, 31); - } - z = tcg_constant_i32(0); - tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i32(t); + return; + } + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(t, t); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { + t = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t, arg1); + tcg_gen_and_i32(t, t, arg1); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_xori_i32(t, t, 31); } else { gen_helper_ctz_i32(ret, arg1, arg2); + return; } + + z = tcg_constant_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i32(t); } void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + if (arg2 == 32 + && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0) + && tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_REG, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_subi_i32(t, arg1, 1); @@ -797,7 +783,7 @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_REG, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_sari_i32(t, arg, 31); tcg_gen_xor_i32(t, t, arg); @@ -811,9 +797,9 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) { - if (TCG_TARGET_HAS_ctpop_i32) { - tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); - } else if (TCG_TARGET_HAS_ctpop_i64) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { + tcg_gen_op2_i32(INDEX_op_ctpop, ret, arg1); + } else if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_extu_i32_i64(t, arg1); tcg_gen_ctpop_i64(t, t); @@ -826,15 +812,18 @@ void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shl_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shr_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -848,12 +837,15 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i32(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(arg2); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_constant_i32(32 - arg2); + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0); } else { - TCGv_i32 t0, t1; - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shli_i32(t0, arg1, arg2); tcg_gen_shri_i32(t1, arg1, 32 - arg2); tcg_gen_or_i32(ret, t0, t1); @@ -864,15 +856,18 @@ void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_rot_i32) { - tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) { + tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_neg_i32(t0, arg2); + tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i32 t0, t1; - - t0 = tcg_temp_ebb_new_i32(); - t1 = tcg_temp_ebb_new_i32(); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); tcg_gen_shr_i32(t0, arg1, arg2); - tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_neg_i32(t1, arg2); tcg_gen_shl_i32(t1, arg1, t1); tcg_gen_or_i32(ret, t0, t1); tcg_temp_free_i32(t0); @@ -883,12 +878,7 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 32); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i32(ret, arg1); - } else { - tcg_gen_rotli_i32(ret, arg1, 32 - arg2); - } + tcg_gen_rotli_i32(ret, arg1, -arg2 & 31); } void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, @@ -907,13 +897,13 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, return; } if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, arg1, arg2, ofs, len); return; } t1 = tcg_temp_ebb_new_i32(); - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { if (ofs + len == 32) { tcg_gen_shli_i32(t1, arg1, len); tcg_gen_extract2_i32(ret, t1, arg2, len); @@ -953,42 +943,22 @@ void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_andi_i32(ret, arg, (1u << len) - 1); } else if (TCG_TARGET_deposit_valid(TCG_TYPE_I32, ofs, len)) { TCGv_i32 zero = tcg_constant_i32(0); - tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len); - } else { - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shli_i32(ret, ret, ofs); - return; - } - break; + tcg_gen_op5ii_i32(INDEX_op_deposit, ret, zero, arg, ofs, len); + } else { + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_extract_i32(ret, arg, 0, len); + tcg_gen_shli_i32(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext16u_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_shli_i32(ret, arg, ofs); - tcg_gen_ext8u_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_shli_i32(ret, arg, ofs); + tcg_gen_extract_i32(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i32(ret, arg, (1u << len) - 1); tcg_gen_shli_i32(ret, ret, ofs); @@ -1008,32 +978,21 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_shri_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i32(ret, arg, (1u << len) - 1); - return; - } if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len); + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, ofs, len); + return; + } + if (ofs == 0) { + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); return; } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_ext16u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_ext8u_i32(ret, arg); - tcg_gen_shri_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_extract, ret, arg, 0, ofs + len); + tcg_gen_shri_i32(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -1064,54 +1023,22 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, tcg_gen_sari_i32(ret, arg, 32 - len); return; } - if (ofs == 0) { - switch (len) { - case 16: - tcg_gen_ext16s_i32(ret, arg); - return; - case 8: - tcg_gen_ext8s_i32(ret, arg); - return; - } - } if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) { - tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_ext16s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_ext8s_i32(ret, arg); - tcg_gen_sari_i32(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, ofs + len)) { + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, arg, 0, ofs + len); + tcg_gen_sari_i32(ret, ret, ofs); + return; } - switch (len) { - case 16: - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext16s_i32(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_shri_i32(ret, arg, ofs); - tcg_gen_ext8s_i32(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, 0, len)) { + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_op4ii_i32(INDEX_op_sextract, ret, ret, 0, len); + return; } tcg_gen_shli_i32(ret, arg, 32 - len - ofs); @@ -1132,8 +1059,8 @@ void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah, tcg_gen_mov_i32(ret, ah); } else if (al == ah) { tcg_gen_rotri_i32(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i32) { - tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { + tcg_gen_op4i_i32(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); tcg_gen_shri_i32(t0, al, ofs); @@ -1150,52 +1077,89 @@ void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i32(ret, v2); } else { - tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); + tcg_gen_op6i_i32(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); } } void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_add2_i32) { - tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_addci, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_add_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + tcg_gen_add_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, al); + tcg_gen_add_i32(rh, ah, bh); + tcg_gen_add_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } +} + +void tcg_gen_addcio_i32(TCGv_i32 r, TCGv_i32 co, + TCGv_i32 a, TCGv_i32 b, TCGv_i32 ci) +{ + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, t0, ci, mone); + tcg_gen_op3_i32(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i32(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i32(t0); + } else { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + + tcg_gen_add_i32(t0, a, b); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i32(r, t0, ci); + tcg_gen_setcond_i32(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i32(co, t0, t1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh) { - if (TCG_TARGET_HAS_sub2_i32) { - tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_I32, 0)) { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + tcg_gen_op3_i32(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i32(INDEX_op_subbi, rh, ah, bh); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); } else { - TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - TCGv_i64 t1 = tcg_temp_ebb_new_i64(); - tcg_gen_concat_i32_i64(t0, al, ah); - tcg_gen_concat_i32_i64(t1, bl, bh); - tcg_gen_sub_i64(t0, t0, t1); - tcg_gen_extr_i64_i32(rl, rh, t0); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 t1 = tcg_temp_ebb_new_i32(); + tcg_gen_sub_i32(t0, al, bl); + tcg_gen_setcond_i32(TCG_COND_LTU, t1, al, bl); + tcg_gen_sub_i32(rh, ah, bh); + tcg_gen_sub_i32(rh, rh, t1); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); } } void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_mulu2_i32) { - tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i32) { + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_mulu2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 64) { @@ -1208,18 +1172,18 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); } else { - qemu_build_not_reached(); + g_assert_not_reached(); } } void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { - if (TCG_TARGET_HAS_muls2_i32) { - tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i32) { + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I32, 0)) { + tcg_gen_op4_i32(INDEX_op_muls2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); - tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); - tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i32(rl, t); tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32) { @@ -1281,40 +1245,22 @@ void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8s_i32) { - tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 24); - tcg_gen_sari_i32(ret, ret, 24); - } + tcg_gen_sextract_i32(ret, arg, 0, 8); } void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16s_i32) { - tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); - } else { - tcg_gen_shli_i32(ret, arg, 16); - tcg_gen_sari_i32(ret, ret, 16); - } + tcg_gen_sextract_i32(ret, arg, 0, 16); } void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext8u_i32) { - tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffu); - } + tcg_gen_extract_i32(ret, arg, 0, 8); } void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_ext16u_i32) { - tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); - } else { - tcg_gen_andi_i32(ret, arg, 0xffffu); - } + tcg_gen_extract_i32(ret, arg, 0, 16); } /* @@ -1330,8 +1276,8 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) /* Only one extension flag may be present. */ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ)); - if (TCG_TARGET_HAS_bswap16_i32) { - tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags); + if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -1367,8 +1313,8 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags) */ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) { - if (TCG_TARGET_HAS_bswap32_i32) { - tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); + if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I32, 0)) { + tcg_gen_op3i_i32(INDEX_op_bswap32, ret, arg, 0); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 t1 = tcg_temp_ebb_new_i32(); @@ -1433,42 +1379,42 @@ void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld8u, ret, arg2, offset); } void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld8s, ret, arg2, offset); } void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld16u, ret, arg2, offset); } void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld16s, ret, arg2, offset); } void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_ld, ret, arg2, offset); } void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st8, arg1, arg2, offset); } void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st16, arg1, arg2, offset); } void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) { - tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); + tcg_gen_ldst_op_i32(INDEX_op_st, arg1, arg2, offset); } @@ -1490,7 +1436,7 @@ void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) return; } if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); + tcg_gen_op2_i64(INDEX_op_mov, ret, arg); } else { TCGTemp *ts = tcgv_i64_temp(arg); @@ -1517,7 +1463,7 @@ void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld8u, ret, arg2, offset); } else { tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1527,7 +1473,7 @@ void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld8s, ret, arg2, offset); } else { tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1537,7 +1483,7 @@ void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld16u, ret, arg2, offset); } else { tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1547,7 +1493,7 @@ void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld16s, ret, arg2, offset); } else { tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1557,7 +1503,7 @@ void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld32u, ret, arg2, offset); } else { tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); @@ -1567,7 +1513,7 @@ void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld32s, ret, arg2, offset); } else { tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); @@ -1581,7 +1527,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) * they cannot be the same temporary -- no chance of overlap. */ if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_ld, ret, arg2, offset); } else if (HOST_BIG_ENDIAN) { tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); @@ -1594,7 +1540,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st8, arg1, arg2, offset); } else { tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1603,7 +1549,7 @@ void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st16, arg1, arg2, offset); } else { tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1612,7 +1558,7 @@ void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st32, arg1, arg2, offset); } else { tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); } @@ -1621,7 +1567,7 @@ void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); + tcg_gen_ldst_op_i64(INDEX_op_st, arg1, arg2, offset); } else if (HOST_BIG_ENDIAN) { tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); @@ -1634,7 +1580,7 @@ void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_add, ret, arg1, arg2); } else { tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); @@ -1644,7 +1590,7 @@ void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_sub, ret, arg1, arg2); } else { tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2)); @@ -1654,7 +1600,7 @@ void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_and, ret, arg1, arg2); } else { tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); @@ -1664,7 +1610,7 @@ void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_or, ret, arg1, arg2); } else { tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); @@ -1674,7 +1620,7 @@ void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_xor, ret, arg1, arg2); } else { tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); @@ -1684,7 +1630,7 @@ void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_shl, ret, arg1, arg2); } else { gen_helper_shl_i64(ret, arg1, arg2); } @@ -1693,7 +1639,7 @@ void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_shr, ret, arg1, arg2); } else { gen_helper_shr_i64(ret, arg1, arg2); } @@ -1702,7 +1648,7 @@ void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_sar, ret, arg1, arg2); } else { gen_helper_sar_i64(ret, arg1, arg2); } @@ -1714,7 +1660,7 @@ void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) TCGv_i32 t1; if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, ret, arg1, arg2); return; } @@ -1770,7 +1716,7 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); + tcg_gen_op2_i64(INDEX_op_neg, ret, arg); } else { TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), @@ -1794,23 +1740,19 @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) case -1: tcg_gen_mov_i64(ret, arg1); return; - case 0xff: - /* Don't recurse with tcg_gen_ext8u_i64. */ - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1); - return; - } - break; - case 0xffff: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1); - return; - } - break; - case 0xffffffffu: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1); - return; + default: + /* + * Canonicalize on extract, if valid. This aids x86 with its + * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode + * which does not require matching operands. Other backends can + * trivially expand the extract to AND during code generation. + */ + if (!(arg2 & (arg2 + 1))) { + unsigned len = ctz64(~arg2); + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg1, 0, len); + return; + } } break; } @@ -1845,9 +1787,10 @@ void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) /* Some cases can be optimized here. */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) { + } else if (arg2 == -1 && + tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { /* Don't recurse with tcg_gen_not_i64. */ - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1); + tcg_gen_op2_i64(INDEX_op_not, ret, arg1); } else { tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2)); } @@ -1875,7 +1818,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_movi_i32(TCGV_LOW(ret), 0); } } else if (right) { - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), c); } else { @@ -1889,7 +1832,7 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); } } else { - if (TCG_TARGET_HAS_extract2_i32) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I32, 0)) { tcg_gen_extract2_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c); } else { @@ -1950,7 +1893,7 @@ void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), cond, label_arg(l)); } else { - op = tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, + op = tcg_gen_op4ii_i64(INDEX_op_brcond, arg1, arg2, cond, label_arg(l)); } add_as_label_use(l, op); @@ -1987,7 +1930,7 @@ void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); + tcg_gen_op4i_i64(INDEX_op_setcond, ret, arg1, arg2, cond); } } } @@ -2023,17 +1966,14 @@ void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret, tcg_gen_movi_i64(ret, -1); } else if (cond == TCG_COND_NEVER) { tcg_gen_movi_i64(ret, 0); - } else if (TCG_TARGET_HAS_negsetcond_i64) { - tcg_gen_op4i_i64(INDEX_op_negsetcond_i64, ret, arg1, arg2, cond); - } else if (TCG_TARGET_REG_BITS == 32) { + } else if (TCG_TARGET_REG_BITS == 64) { + tcg_gen_op4i_i64(INDEX_op_negsetcond, ret, arg1, arg2, cond); + } else { tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); tcg_gen_neg_i32(TCGV_LOW(ret), TCGV_LOW(ret)); tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_LOW(ret)); - } else { - tcg_gen_setcond_i64(cond, ret, arg1, arg2); - tcg_gen_neg_i64(ret, ret); } } @@ -2050,12 +1990,12 @@ void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { - tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divs, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, ret, t0, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_div_i64(ret, arg1, arg2); @@ -2064,18 +2004,18 @@ void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { - tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_rems, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rems, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divs, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divs, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divs2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t0, arg1, 63); - tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); + tcg_gen_op5_i64(INDEX_op_divs2, t0, ret, arg1, t0, arg2); tcg_temp_free_i64(t0); } else { gen_helper_rem_i64(ret, arg1, arg2); @@ -2084,12 +2024,12 @@ void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_div_i64) { - tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div2_i64) { + if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_divu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, zero, arg2); + tcg_gen_op5_i64(INDEX_op_divu2, ret, t0, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_divu_i64(ret, arg1, arg2); @@ -2098,18 +2038,18 @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rem_i64) { - tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_div_i64) { + if (tcg_op_supported(INDEX_op_remu, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_remu, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_divu, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_divu, t0, arg1, arg2); tcg_gen_mul_i64(t0, t0, arg2); tcg_gen_sub_i64(ret, arg1, t0); tcg_temp_free_i64(t0); - } else if (TCG_TARGET_HAS_div2_i64) { + } else if (tcg_op_supported(INDEX_op_divu2, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, zero, arg2); + tcg_gen_op5_i64(INDEX_op_divu2, t0, ret, arg1, zero, arg2); tcg_temp_free_i64(t0); } else { gen_helper_remu_i64(ret, arg1, arg2); @@ -2118,77 +2058,32 @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 56); - tcg_gen_sari_i64(ret, ret, 56); - } + tcg_gen_sextract_i64(ret, arg, 0, 8); } void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 48); - tcg_gen_sari_i64(ret, ret, 48); - } + tcg_gen_sextract_i64(ret, arg, 0, 16); } void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); - } else if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); - } else { - tcg_gen_shli_i64(ret, arg, 32); - tcg_gen_sari_i64(ret, ret, 32); - } + tcg_gen_sextract_i64(ret, arg, 0, 32); } void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffu); - } + tcg_gen_extract_i64(ret, arg, 0, 8); } void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 16); } void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_REG_BITS == 32) { - tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); - } else if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); - } else { - tcg_gen_andi_i64(ret, arg, 0xffffffffu); - } + tcg_gen_extract_i64(ret, arg, 0, 32); } /* @@ -2211,8 +2106,8 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap16_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap16, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap16, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2261,8 +2156,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags) } else { tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } - } else if (TCG_TARGET_HAS_bswap32_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags); + } else if (tcg_op_supported(INDEX_op_bswap32, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap32, ret, arg, flags); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2308,8 +2203,8 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) tcg_gen_mov_i32(TCGV_HIGH(ret), t0); tcg_temp_free_i32(t0); tcg_temp_free_i32(t1); - } else if (TCG_TARGET_HAS_bswap64_i64) { - tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); + } else if (tcg_op_supported(INDEX_op_bswap64, TCG_TYPE_I64, 0)) { + tcg_gen_op3i_i64(INDEX_op_bswap64, ret, arg, 0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -2380,8 +2275,8 @@ void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_not_i64) { - tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); + } else if (tcg_op_supported(INDEX_op_not, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_not, ret, arg); } else { tcg_gen_xori_i64(ret, arg, -1); } @@ -2392,8 +2287,8 @@ void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_andc_i64) { - tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_andc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_andc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); @@ -2407,8 +2302,8 @@ void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_eqv_i64) { - tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_eqv, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_eqv, ret, arg1, arg2); } else { tcg_gen_xor_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -2420,8 +2315,8 @@ void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nand_i64) { - tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nand, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nand, ret, arg1, arg2); } else { tcg_gen_and_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -2433,8 +2328,8 @@ void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_nor_i64) { - tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_nor, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_nor, ret, arg1, arg2); } else { tcg_gen_or_i64(ret, arg1, arg2); tcg_gen_not_i64(ret, ret); @@ -2446,8 +2341,8 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) if (TCG_TARGET_REG_BITS == 32) { tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); - } else if (TCG_TARGET_HAS_orc_i64) { - tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_orc, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_orc, ret, arg1, arg2); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_not_i64(t0, arg2); @@ -2458,8 +2353,8 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_clz_i64) { - tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_clz, ret, arg1, arg2); } else { gen_helper_clz_i64(ret, arg1, arg2); } @@ -2468,8 +2363,8 @@ void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 - && TCG_TARGET_HAS_clz_i32 - && arg2 <= 0xffffffffu) { + && arg2 <= 0xffffffffu + && tcg_op_supported(INDEX_op_clz, TCG_TYPE_I32, 0)) { TCGv_i32 t = tcg_temp_ebb_new_i32(); tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32); tcg_gen_addi_i32(t, t, 32); @@ -2483,45 +2378,47 @@ void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_ctz_i64) { - tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) { - TCGv_i64 z, t = tcg_temp_ebb_new_i64(); + TCGv_i64 z, t; - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_subi_i64(t, arg1, 1); - tcg_gen_andc_i64(t, t, arg1); - tcg_gen_ctpop_i64(t, t); - } else { - /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */ - tcg_gen_neg_i64(t, arg1); - tcg_gen_and_i64(t, t, arg1); - tcg_gen_clzi_i64(t, t, 64); - tcg_gen_xori_i64(t, t, 63); - } - z = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); - tcg_temp_free_i64(t); - tcg_temp_free_i64(z); + if (tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2); + return; + } + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(t, t); + } else if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { + t = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t, arg1); + tcg_gen_and_i64(t, t, arg1); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_xori_i64(t, t, 63); } else { gen_helper_ctz_i64(ret, arg1, arg2); + return; } + + z = tcg_constant_i64(0); + tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i64(t); } void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) { if (TCG_TARGET_REG_BITS == 32 - && TCG_TARGET_HAS_ctz_i32 - && arg2 <= 0xffffffffu) { + && arg2 <= 0xffffffffu + && tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)) { TCGv_i32 t32 = tcg_temp_ebb_new_i32(); tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32); tcg_gen_addi_i32(t32, t32, 32); tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); tcg_temp_free_i32(t32); - } else if (!TCG_TARGET_HAS_ctz_i64 - && TCG_TARGET_HAS_ctpop_i64 - && arg2 == 64) { + } else if (arg2 == 64 + && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0) + && tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { /* This equivalence has the advantage of not requiring a fixup. */ TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_subi_i64(t, arg1, 1); @@ -2535,7 +2432,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) { - if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) { + if (tcg_op_supported(INDEX_op_clz, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); tcg_gen_sari_i64(t, arg, 63); tcg_gen_xor_i64(t, t, arg); @@ -2549,28 +2446,37 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) { - if (TCG_TARGET_HAS_ctpop_i64) { - tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); - } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { - tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); - tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); - tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); - tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I64, 0)) { + tcg_gen_op2_i64(INDEX_op_ctpop, ret, arg1); + return; + } } else { - gen_helper_ctpop_i64(ret, arg1); + if (tcg_op_supported(INDEX_op_ctpop, TCG_TYPE_I32, 0)) { + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } } + gen_helper_ctpop_i64(ret, arg1); } void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shl_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shr_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -2584,12 +2490,15 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) /* some cases can be optimized here */ if (arg2 == 0) { tcg_gen_mov_i64(ret, arg1); - } else if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2)); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(arg2); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); + } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_constant_i64(64 - arg2); + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shli_i64(t0, arg1, arg2); tcg_gen_shri_i64(t1, arg1, 64 - arg2); tcg_gen_or_i64(ret, t0, t1); @@ -2600,14 +2509,18 @@ void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_rot_i64) { - tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); + if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + tcg_gen_neg_i64(t0, arg2); + tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0); + tcg_temp_free_i64(t0); } else { - TCGv_i64 t0, t1; - t0 = tcg_temp_ebb_new_i64(); - t1 = tcg_temp_ebb_new_i64(); + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); tcg_gen_shr_i64(t0, arg1, arg2); - tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_neg_i64(t1, arg2); tcg_gen_shl_i64(t1, arg1, t1); tcg_gen_or_i64(ret, t0, t1); tcg_temp_free_i64(t0); @@ -2618,12 +2531,7 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) { tcg_debug_assert(arg2 >= 0 && arg2 < 64); - /* some cases can be optimized here */ - if (arg2 == 0) { - tcg_gen_mov_i64(ret, arg1); - } else { - tcg_gen_rotli_i64(ret, arg1, 64 - arg2); - } + tcg_gen_rotli_i64(ret, arg1, -arg2 & 63); } void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, @@ -2644,7 +2552,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, arg1, arg2, ofs, len); return; } } else { @@ -2664,7 +2572,7 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, t1 = tcg_temp_ebb_new_i64(); - if (TCG_TARGET_HAS_extract2_i64) { + if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { if (ofs + len == 64) { tcg_gen_shli_i64(t1, arg1, len); tcg_gen_extract2_i64(ret, t1, arg2, len); @@ -2705,7 +2613,7 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, } else if (TCG_TARGET_REG_BITS == 64 && TCG_TARGET_deposit_valid(TCG_TYPE_I64, ofs, len)) { TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len); + tcg_gen_op5ii_i64(INDEX_op_deposit, ret, zero, arg, ofs, len); } else { if (TCG_TARGET_REG_BITS == 32) { if (ofs >= 32) { @@ -2720,54 +2628,20 @@ void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, return; } } - /* To help two-operand hosts we prefer to zero-extend first, - which allows ARG to stay live. */ - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shli_i64(ret, ret, ofs); - return; - } - break; + /* + * To help two-operand hosts we prefer to zero-extend first, + * which allows ARG to stay live. + */ + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_extract_i64(ret, arg, 0, len); + tcg_gen_shli_i64(ret, ret, ofs); + return; } /* Otherwise prefer zero-extension over AND for code size. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext32u_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext16u_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_shli_i64(ret, arg, ofs); - tcg_gen_ext8u_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_extract_i64(ret, ret, 0, ofs + len); + return; } tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); tcg_gen_shli_i64(ret, ret, ofs); @@ -2787,10 +2661,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_shri_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); - return; - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2804,39 +2674,34 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_movi_i32(TCGV_HIGH(ret), 0); return; } - /* The field is split across two words. One double-word - shift is better than two double-word shifts. */ - goto do_shift_and; + + /* The field is split across two words. */ + tcg_gen_extract2_i32(TCGV_LOW(ret), TCGV_LOW(arg), + TCGV_HIGH(arg), ofs); + if (len <= 32) { + tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(ret), 0, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } else { + tcg_gen_extract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), + ofs, len - 32); + } + return; } if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len); + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, ofs, len); + return; + } + if (ofs == 0) { + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); return; } /* Assume that zero-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32u_i64) { - tcg_gen_ext32u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16u_i64) { - tcg_gen_ext16u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8u_i64) { - tcg_gen_ext8u_i64(ret, arg); - tcg_gen_shri_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_extract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_extract, ret, arg, 0, ofs + len); + tcg_gen_shri_i64(ret, ret, ofs); + return; } /* ??? Ideally we'd know what values are available for immediate AND. @@ -2844,7 +2709,6 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, so that we get ext8u, ext16u, and ext32u. */ switch (len) { case 1 ... 8: case 16: case 32: - do_shift_and: tcg_gen_shri_i64(ret, arg, ofs); tcg_gen_andi_i64(ret, ret, (1ull << len) - 1); break; @@ -2868,19 +2732,6 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, tcg_gen_sari_i64(ret, arg, 64 - len); return; } - if (ofs == 0) { - switch (len) { - case 32: - tcg_gen_ext32s_i64(ret, arg); - return; - case 16: - tcg_gen_ext16s_i64(ret, arg); - return; - case 8: - tcg_gen_ext8s_i64(ret, arg); - return; - } - } if (TCG_TARGET_REG_BITS == 32) { /* Look for a 32-bit extract within one of the two words. */ @@ -2915,57 +2766,22 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, } if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) { - tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, ofs, len); return; } /* Assume that sign-extension, if available, is cheaper than a shift. */ - switch (ofs + len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_ext32s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_ext16s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_ext8s_i64(ret, arg); - tcg_gen_sari_i64(ret, ret, ofs); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, ofs + len)) { + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, arg, 0, ofs + len); + tcg_gen_sari_i64(ret, ret, ofs); + return; } - switch (len) { - case 32: - if (TCG_TARGET_HAS_ext32s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext32s_i64(ret, ret); - return; - } - break; - case 16: - if (TCG_TARGET_HAS_ext16s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext16s_i64(ret, ret); - return; - } - break; - case 8: - if (TCG_TARGET_HAS_ext8s_i64) { - tcg_gen_shri_i64(ret, arg, ofs); - tcg_gen_ext8s_i64(ret, ret); - return; - } - break; + if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, 0, len)) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_op4ii_i64(INDEX_op_sextract, ret, ret, 0, len); + return; } + tcg_gen_shli_i64(ret, arg, 64 - len - ofs); tcg_gen_sari_i64(ret, ret, 64 - len); } @@ -2984,8 +2800,8 @@ void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah, tcg_gen_mov_i64(ret, ah); } else if (al == ah) { tcg_gen_rotri_i64(ret, al, ofs); - } else if (TCG_TARGET_HAS_extract2_i64) { - tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs); + } else if (tcg_op_supported(INDEX_op_extract2, TCG_TYPE_I64, 0)) { + tcg_gen_op4i_i64(INDEX_op_extract2, ret, al, ah, ofs); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); tcg_gen_shri_i64(t0, al, ofs); @@ -3002,7 +2818,7 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, } else if (cond == TCG_COND_NEVER) { tcg_gen_mov_i64(ret, v2); } else if (TCG_TARGET_REG_BITS == 64) { - tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond); + tcg_gen_op6i_i64(INDEX_op_movcond, ret, c1, c2, v1, v2, cond); } else { TCGv_i32 t0 = tcg_temp_ebb_new_i32(); TCGv_i32 zero = tcg_constant_i32(0); @@ -3023,8 +2839,25 @@ void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_add2_i64) { - tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_addco, TCGV_LOW(t0), + TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(t0), + TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(rh), + TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_HIGH(rh), + TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_addco, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_addci, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -3038,11 +2871,96 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, } } +void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, + TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci) +{ + if (TCG_TARGET_REG_BITS == 64) { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) { + TCGv_i64 discard = tcg_temp_ebb_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 mone = tcg_constant_i64(-1); + + tcg_gen_op3_i64(INDEX_op_addco, discard, ci, mone); + tcg_gen_op3_i64(INDEX_op_addcio, r, a, b); + tcg_gen_op3_i64(INDEX_op_addci, co, zero, zero); + tcg_temp_free_i64(discard); + } else { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + TCGv_i64 t1 = tcg_temp_ebb_new_i64(); + + tcg_gen_add_i64(t0, a, b); + tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, a); + tcg_gen_add_i64(r, t0, ci); + tcg_gen_setcond_i64(TCG_COND_LTU, t0, r, t0); + tcg_gen_or_i64(co, t0, t1); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } + } else { + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I32, 0)) { + TCGv_i32 discard = tcg_temp_ebb_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 mone = tcg_constant_i32(-1); + + tcg_gen_op3_i32(INDEX_op_addco, discard, TCGV_LOW(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, discard, TCGV_HIGH(ci), mone); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_LOW(r), + TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_op3_i32(INDEX_op_addcio, TCGV_HIGH(r), + TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_op3_i32(INDEX_op_addci, TCGV_LOW(co), zero, zero); + tcg_temp_free_i32(discard); + } else { + TCGv_i32 t0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c0 = tcg_temp_ebb_new_i32(); + TCGv_i32 c1 = tcg_temp_ebb_new_i32(); + + tcg_gen_or_i32(c1, TCGV_LOW(ci), TCGV_HIGH(ci)); + tcg_gen_setcondi_i32(TCG_COND_NE, c1, c1, 0); + + tcg_gen_add_i32(t0, TCGV_LOW(a), TCGV_LOW(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_LOW(a)); + tcg_gen_add_i32(TCGV_LOW(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_LOW(r), c1); + tcg_gen_or_i32(c1, c1, c0); + + tcg_gen_add_i32(t0, TCGV_HIGH(a), TCGV_HIGH(b)); + tcg_gen_setcond_i32(TCG_COND_LTU, c0, t0, TCGV_HIGH(a)); + tcg_gen_add_i32(TCGV_HIGH(r), t0, c1); + tcg_gen_setcond_i32(TCG_COND_LTU, c1, TCGV_HIGH(r), c1); + tcg_gen_or_i32(TCGV_LOW(co), c0, c1); + + tcg_temp_free_i32(t0); + tcg_temp_free_i32(c0); + tcg_temp_free_i32(c1); + } + tcg_gen_movi_i32(TCGV_HIGH(co), 0); + } +} + void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { - if (TCG_TARGET_HAS_sub2_i64) { - tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); + if (tcg_op_supported(INDEX_op_subbi, TCG_TYPE_REG, 0)) { + TCGv_i64 t0 = tcg_temp_ebb_new_i64(); + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op3_i32(INDEX_op_subbo, TCGV_LOW(t0), + TCGV_LOW(al), TCGV_LOW(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_HIGH(t0), + TCGV_HIGH(al), TCGV_HIGH(bl)); + tcg_gen_op3_i32(INDEX_op_subbio, TCGV_LOW(rh), + TCGV_LOW(ah), TCGV_LOW(bh)); + tcg_gen_op3_i32(INDEX_op_subbi, TCGV_HIGH(rh), + TCGV_HIGH(ah), TCGV_HIGH(bh)); + } else { + tcg_gen_op3_i64(INDEX_op_subbo, t0, al, bl); + tcg_gen_op3_i64(INDEX_op_subbi, rh, ah, bh); + } + + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); } else { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); @@ -3058,12 +2976,12 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_mulu2_i64) { - tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_muluh_i64) { + if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_mulu2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); } else { @@ -3077,15 +2995,16 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { - if (TCG_TARGET_HAS_muls2_i64) { - tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - } else if (TCG_TARGET_HAS_mulsh_i64) { + if (tcg_op_supported(INDEX_op_muls2, TCG_TYPE_I64, 0)) { + tcg_gen_op4_i64(INDEX_op_muls2, rl, rh, arg1, arg2); + } else if (tcg_op_supported(INDEX_op_mulsh, TCG_TYPE_I64, 0)) { TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); - tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mul, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh, rh, arg1, arg2); tcg_gen_mov_i64(rl, t); tcg_temp_free_i64(t); - } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) { + } else if (tcg_op_supported(INDEX_op_mulu2, TCG_TYPE_I64, 0) || + tcg_op_supported(INDEX_op_muluh, TCG_TYPE_I64, 0)) { TCGv_i64 t0 = tcg_temp_ebb_new_i64(); TCGv_i64 t1 = tcg_temp_ebb_new_i64(); TCGv_i64 t2 = tcg_temp_ebb_new_i64(); @@ -3164,11 +3083,9 @@ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else if (TCG_TARGET_HAS_extr_i64_i32) { + } else { tcg_gen_op2(INDEX_op_extrl_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); - } else { - tcg_gen_mov_i32(ret, (TCGv_i32)arg); } } @@ -3176,14 +3093,9 @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); - } else if (TCG_TARGET_HAS_extr_i64_i32) { + } else { tcg_gen_op2(INDEX_op_extrh_i64_i32, TCG_TYPE_I32, tcgv_i32_arg(ret), tcgv_i64_arg(arg)); - } else { - TCGv_i64 t = tcg_temp_ebb_new_i64(); - tcg_gen_shri_i64(t, arg, 32); - tcg_gen_mov_i32(ret, (TCGv_i32)t); - tcg_temp_free_i64(t); } } @@ -133,9 +133,11 @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]); +static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest); +static void tcg_out_mb(TCGContext *s, unsigned bar); +static void tcg_out_br(TCGContext *s, TCGLabel *l); +static void tcg_out_set_carry(TCGContext *s); +static void tcg_out_set_borrow(TCGContext *s); #if TCG_TARGET_MAYBE_vec static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg dst, TCGReg src); @@ -861,6 +863,7 @@ static int tcg_out_pool_finalize(TCGContext *s) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { + C_Dynamic = -2, C_NotImplemented = -1, #include "tcg-target-con-set.h" } TCGConstraintSetIndex; @@ -954,6 +957,164 @@ static const TCGConstraintSet constraint_sets[] = { #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) +/* + * TCGOutOp is the base class for a set of structures that describe how + * to generate code for a given TCGOpcode. + * + * @static_constraint: + * C_NotImplemented: The TCGOpcode is not supported by the backend. + * C_Dynamic: Use @dynamic_constraint to select a constraint set + * based on any of @type, @flags, or host isa. + * Otherwise: The register allocation constrains for the TCGOpcode. + * + * Subclasses of TCGOutOp will define a set of output routines that may + * be used. Such routines will often be selected by the set of registers + * and constants that come out of register allocation. The set of + * routines that are provided will guide the set of constraints that are + * legal. In particular, assume that tcg_optimize() has done its job in + * swapping commutative operands and folding operations for which all + * operands are constant. + */ +typedef struct TCGOutOp { + TCGConstraintSetIndex static_constraint; + TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags); +} TCGOutOp; + +typedef struct TCGOutOpAddSubCarry { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); + void (*out_rii)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, tcg_target_long a2); +} TCGOutOpAddSubCarry; + +typedef struct TCGOutOpBinary { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, tcg_target_long a2); +} TCGOutOpBinary; + +typedef struct TCGOutOpBrcond { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, TCGReg a2, TCGLabel *label); + void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg a1, tcg_target_long a2, TCGLabel *label); +} TCGOutOpBrcond; + +typedef struct TCGOutOpBrcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l); +} TCGOutOpBrcond2; + +typedef struct TCGOutOpBswap { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags); +} TCGOutOpBswap; + +typedef struct TCGOutOpDeposit { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len); + void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + tcg_target_long a2, unsigned ofs, unsigned len); + void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0, + TCGReg a2, unsigned ofs, unsigned len); +} TCGOutOpDeposit; + +typedef struct TCGOutOpDivRem { + TCGOutOp base; + void (*out_rr01r)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a4); +} TCGOutOpDivRem; + +typedef struct TCGOutOpExtract { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + unsigned ofs, unsigned len); +} TCGOutOpExtract; + +typedef struct TCGOutOpExtract2 { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned shr); +} TCGOutOpExtract2; + +typedef struct TCGOutOpLoad { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, intptr_t offset); +} TCGOutOpLoad; + +typedef struct TCGOutOpMovcond { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf); +} TCGOutOpMovcond; + +typedef struct TCGOutOpMul2 { + TCGOutOp base; + void (*out_rrrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3); +} TCGOutOpMul2; + +typedef struct TCGOutOpQemuLdSt { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dest, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt; + +typedef struct TCGOutOpQemuLdSt2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGType type, TCGReg dlo, TCGReg dhi, + TCGReg addr, MemOpIdx oi); +} TCGOutOpQemuLdSt2; + +typedef struct TCGOutOpUnary { + TCGOutOp base; + void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1); +} TCGOutOpUnary; + +typedef struct TCGOutOpSetcond { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, TCGReg a2); + void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg a1, tcg_target_long a2); +} TCGOutOpSetcond; + +typedef struct TCGOutOpSetcond2 { + TCGOutOp base; + void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, TCGArg bh, bool const_bh); +} TCGOutOpSetcond2; + +typedef struct TCGOutOpStore { + TCGOutOp base; + void (*out_r)(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, intptr_t offset); + void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data, + TCGReg base, intptr_t offset); +} TCGOutOpStore; + +typedef struct TCGOutOpSubtract { + TCGOutOp base; + void (*out_rrr)(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2); + void (*out_rir)(TCGContext *s, TCGType type, + TCGReg a0, tcg_target_long a1, TCGReg a2); +} TCGOutOpSubtract; + #include "tcg-target.c.inc" #ifndef CONFIG_TCG_INTERPRETER @@ -963,6 +1124,144 @@ QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) - < MIN_TLB_MASK_TABLE_OFS); #endif +#if TCG_TARGET_REG_BITS == 64 +/* + * We require these functions for slow-path function calls. + * Adapt them generically for opcode output. + */ + +static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_exts_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_exts_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_exts_i32_i64, +}; + +static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extu_i32_i64(s, a0, a1); +} + +static const TCGOutOpUnary outop_extu_i32_i64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extu_i32_i64, +}; + +static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extrl_i64_i32(s, a0, a1); +} + +static const TCGOutOpUnary outop_extrl_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL, +}; +#endif + +static const TCGOutOp outop_goto_ptr = { + .static_constraint = C_O0_I1(r), +}; + +static const TCGOutOpLoad outop_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tcg_out_ld, +}; + +/* + * Register V as the TCGOutOp for O. + * This verifies that V is of type T, otherwise give a nice compiler error. + * This prevents trivial mistakes within each arch/tcg-target.c.inc. + */ +#define OUTOP(O, T, V) [O] = _Generic(V, T: &V.base) + +/* Register allocation descriptions for every TCGOpcode. */ +static const TCGOutOp * const all_outop[NB_OPS] = { + OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add), + OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci), + OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco), + /* addc1o is implemented with set_carry + addcio */ + OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio), + OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and), + OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc), + OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond), + OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16), + OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32), + OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz), + OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop), + OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz), + OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit), + OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs), + OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu), + OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2), + OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2), + OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv), + OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract), + OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2), + OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u), + OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s), + OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u), + OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s), + OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld), + OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond), + OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul), + OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2), + OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh), + OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2), + OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh), + OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand), + OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg), + OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond), + OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor), + OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not), + OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or), + OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc), + OUTOP(INDEX_op_qemu_ld, TCGOutOpQemuLdSt, outop_qemu_ld), + OUTOP(INDEX_op_qemu_ld2, TCGOutOpQemuLdSt2, outop_qemu_ld2), + OUTOP(INDEX_op_qemu_st, TCGOutOpQemuLdSt, outop_qemu_st), + OUTOP(INDEX_op_qemu_st2, TCGOutOpQemuLdSt2, outop_qemu_st2), + OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems), + OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu), + OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl), + OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr), + OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar), + OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond), + OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract), + OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl), + OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr), + OUTOP(INDEX_op_st, TCGOutOpStore, outop_st), + OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8), + OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16), + OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub), + OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi), + OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo), + /* subb1o is implemented with set_borrow + subbio */ + OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio), + OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor), + + [INDEX_op_goto_ptr] = &outop_goto_ptr, + +#if TCG_TARGET_REG_BITS == 32 + OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2), + OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2), +#else + OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64), + OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64), + OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64), + OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32), + OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32), + OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u), + OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s), + OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st), +#endif +}; + +#undef OUTOP + /* * All TCG threads except the parent (i.e. the one that called tcg_context_init * and registered the target's TCG globals) must register with this function @@ -2146,208 +2445,56 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) case INDEX_op_exit_tb: case INDEX_op_goto_tb: case INDEX_op_goto_ptr: - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: return true; - case INDEX_op_qemu_st8_i32: - return TCG_TARGET_HAS_qemu_st8_i32; - - case INDEX_op_qemu_ld_i128: - case INDEX_op_qemu_st_i128: - return TCG_TARGET_HAS_qemu_ldst_i128; - - case INDEX_op_mov_i32: - case INDEX_op_setcond_i32: - case INDEX_op_brcond_i32: - case INDEX_op_movcond_i32: - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_add_i32: - case INDEX_op_sub_i32: - case INDEX_op_neg_i32: - case INDEX_op_mul_i32: - case INDEX_op_and_i32: - case INDEX_op_or_i32: - case INDEX_op_xor_i32: - case INDEX_op_shl_i32: - case INDEX_op_shr_i32: - case INDEX_op_sar_i32: - case INDEX_op_extract_i32: - case INDEX_op_sextract_i32: - case INDEX_op_deposit_i32: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + tcg_debug_assert(type <= TCG_TYPE_REG); return true; - case INDEX_op_negsetcond_i32: - return TCG_TARGET_HAS_negsetcond_i32; - case INDEX_op_div_i32: - case INDEX_op_divu_i32: - return TCG_TARGET_HAS_div_i32; - case INDEX_op_rem_i32: - case INDEX_op_remu_i32: - return TCG_TARGET_HAS_rem_i32; - case INDEX_op_div2_i32: - case INDEX_op_divu2_i32: - return TCG_TARGET_HAS_div2_i32; - case INDEX_op_rotl_i32: - case INDEX_op_rotr_i32: - return TCG_TARGET_HAS_rot_i32; - case INDEX_op_extract2_i32: - return TCG_TARGET_HAS_extract2_i32; - case INDEX_op_add2_i32: - return TCG_TARGET_HAS_add2_i32; - case INDEX_op_sub2_i32: - return TCG_TARGET_HAS_sub2_i32; - case INDEX_op_mulu2_i32: - return TCG_TARGET_HAS_mulu2_i32; - case INDEX_op_muls2_i32: - return TCG_TARGET_HAS_muls2_i32; - case INDEX_op_muluh_i32: - return TCG_TARGET_HAS_muluh_i32; - case INDEX_op_mulsh_i32: - return TCG_TARGET_HAS_mulsh_i32; - case INDEX_op_ext8s_i32: - return TCG_TARGET_HAS_ext8s_i32; - case INDEX_op_ext16s_i32: - return TCG_TARGET_HAS_ext16s_i32; - case INDEX_op_ext8u_i32: - return TCG_TARGET_HAS_ext8u_i32; - case INDEX_op_ext16u_i32: - return TCG_TARGET_HAS_ext16u_i32; - case INDEX_op_bswap16_i32: - return TCG_TARGET_HAS_bswap16_i32; - case INDEX_op_bswap32_i32: - return TCG_TARGET_HAS_bswap32_i32; - case INDEX_op_not_i32: - return TCG_TARGET_HAS_not_i32; - case INDEX_op_andc_i32: - return TCG_TARGET_HAS_andc_i32; - case INDEX_op_orc_i32: - return TCG_TARGET_HAS_orc_i32; - case INDEX_op_eqv_i32: - return TCG_TARGET_HAS_eqv_i32; - case INDEX_op_nand_i32: - return TCG_TARGET_HAS_nand_i32; - case INDEX_op_nor_i32: - return TCG_TARGET_HAS_nor_i32; - case INDEX_op_clz_i32: - return TCG_TARGET_HAS_clz_i32; - case INDEX_op_ctz_i32: - return TCG_TARGET_HAS_ctz_i32; - case INDEX_op_ctpop_i32: - return TCG_TARGET_HAS_ctpop_i32; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + if (TCG_TARGET_REG_BITS == 32) { + tcg_debug_assert(type == TCG_TYPE_I64); + return true; + } + tcg_debug_assert(type == TCG_TYPE_I128); + goto do_lookup; + + case INDEX_op_add: + case INDEX_op_and: + case INDEX_op_brcond: + case INDEX_op_deposit: + case INDEX_op_extract: + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld: + case INDEX_op_mov: + case INDEX_op_movcond: + case INDEX_op_negsetcond: + case INDEX_op_or: + case INDEX_op_setcond: + case INDEX_op_sextract: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st: + case INDEX_op_xor: + return has_type; case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: return TCG_TARGET_REG_BITS == 32; - case INDEX_op_mov_i64: - case INDEX_op_setcond_i64: - case INDEX_op_brcond_i64: - case INDEX_op_movcond_i64: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - case INDEX_op_add_i64: - case INDEX_op_sub_i64: - case INDEX_op_neg_i64: - case INDEX_op_mul_i64: - case INDEX_op_and_i64: - case INDEX_op_or_i64: - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_st32: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i64: - case INDEX_op_deposit_i64: - return TCG_TARGET_REG_BITS == 64; - - case INDEX_op_negsetcond_i64: - return TCG_TARGET_HAS_negsetcond_i64; - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - return TCG_TARGET_HAS_div_i64; - case INDEX_op_rem_i64: - case INDEX_op_remu_i64: - return TCG_TARGET_HAS_rem_i64; - case INDEX_op_div2_i64: - case INDEX_op_divu2_i64: - return TCG_TARGET_HAS_div2_i64; - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i64: - return TCG_TARGET_HAS_rot_i64; - case INDEX_op_extract2_i64: - return TCG_TARGET_HAS_extract2_i64; case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: - return TCG_TARGET_HAS_extr_i64_i32; - case INDEX_op_ext8s_i64: - return TCG_TARGET_HAS_ext8s_i64; - case INDEX_op_ext16s_i64: - return TCG_TARGET_HAS_ext16s_i64; - case INDEX_op_ext32s_i64: - return TCG_TARGET_HAS_ext32s_i64; - case INDEX_op_ext8u_i64: - return TCG_TARGET_HAS_ext8u_i64; - case INDEX_op_ext16u_i64: - return TCG_TARGET_HAS_ext16u_i64; - case INDEX_op_ext32u_i64: - return TCG_TARGET_HAS_ext32u_i64; - case INDEX_op_bswap16_i64: - return TCG_TARGET_HAS_bswap16_i64; - case INDEX_op_bswap32_i64: - return TCG_TARGET_HAS_bswap32_i64; - case INDEX_op_bswap64_i64: - return TCG_TARGET_HAS_bswap64_i64; - case INDEX_op_not_i64: - return TCG_TARGET_HAS_not_i64; - case INDEX_op_andc_i64: - return TCG_TARGET_HAS_andc_i64; - case INDEX_op_orc_i64: - return TCG_TARGET_HAS_orc_i64; - case INDEX_op_eqv_i64: - return TCG_TARGET_HAS_eqv_i64; - case INDEX_op_nand_i64: - return TCG_TARGET_HAS_nand_i64; - case INDEX_op_nor_i64: - return TCG_TARGET_HAS_nor_i64; - case INDEX_op_clz_i64: - return TCG_TARGET_HAS_clz_i64; - case INDEX_op_ctz_i64: - return TCG_TARGET_HAS_ctz_i64; - case INDEX_op_ctpop_i64: - return TCG_TARGET_HAS_ctpop_i64; - case INDEX_op_add2_i64: - return TCG_TARGET_HAS_add2_i64; - case INDEX_op_sub2_i64: - return TCG_TARGET_HAS_sub2_i64; - case INDEX_op_mulu2_i64: - return TCG_TARGET_HAS_mulu2_i64; - case INDEX_op_muls2_i64: - return TCG_TARGET_HAS_muls2_i64; - case INDEX_op_muluh_i64: - return TCG_TARGET_HAS_muluh_i64; - case INDEX_op_mulsh_i64: - return TCG_TARGET_HAS_mulsh_i64; + return TCG_TARGET_REG_BITS == 64; case INDEX_op_mov_vec: case INDEX_op_dup_vec: @@ -2416,8 +2563,33 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags) return has_type && TCG_TARGET_HAS_cmpsel_vec; default: - tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS); + if (op < INDEX_op_last_generic) { + const TCGOutOp *outop; + TCGConstraintSetIndex con_set; + + if (!has_type) { + return false; + } + + do_lookup: + outop = all_outop[op]; + tcg_debug_assert(outop != NULL); + + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + if (con_set >= 0) { + return true; + } + tcg_debug_assert(con_set == C_NotImplemented); + return false; + } + tcg_debug_assert(op < NB_OPS); return true; + + case INDEX_op_last_generic: + g_assert_not_reached(); } } @@ -2808,18 +2980,23 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) col += ne_fprintf(f, ",%s", t); } } else { - col += ne_fprintf(f, " %s ", def->name); + if (def->flags & TCG_OPF_INT) { + col += ne_fprintf(f, " %s_i%d ", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op))); + } else if (def->flags & TCG_OPF_VECTOR) { + col += ne_fprintf(f, "%s v%d,e%d,", + def->name, + 8 * tcg_type_size(TCGOP_TYPE(op)), + 8 << TCGOP_VECE(op)); + } else { + col += ne_fprintf(f, " %s ", def->name); + } nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; - if (def->flags & TCG_OPF_VECTOR) { - col += ne_fprintf(f, "v%d,e%d,", - 8 * tcg_type_size(TCGOP_TYPE(op)), - 8 << TCGOP_VECE(op)); - } - k = 0; for (i = 0; i < nb_oargs; i++) { const char *sep = k ? "," : ""; @@ -2834,16 +3011,12 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) op->args[k++])); } switch (c) { - case INDEX_op_brcond_i32: - case INDEX_op_setcond_i32: - case INDEX_op_negsetcond_i32: - case INDEX_op_movcond_i32: + case INDEX_op_brcond: + case INDEX_op_setcond: + case INDEX_op_negsetcond: + case INDEX_op_movcond: case INDEX_op_brcond2_i32: case INDEX_op_setcond2_i32: - case INDEX_op_brcond_i64: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i64: - case INDEX_op_movcond_i64: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: if (op->args[k] < ARRAY_SIZE(cond_name) @@ -2854,13 +3027,10 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) } i = 1; break; - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - case INDEX_op_qemu_st8_i32: - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - case INDEX_op_qemu_ld_i128: - case INDEX_op_qemu_st_i128: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: { const char *s_al, *s_op, *s_at; MemOpIdx oi = op->args[k++]; @@ -2883,11 +3053,9 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) i = 1; } break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_bswap64: { TCGArg flags = op->args[k]; const char *name = NULL; @@ -2928,8 +3096,7 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs) switch (c) { case INDEX_op_set_label: case INDEX_op_br: - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: case INDEX_op_brcond2_i32: col += ne_fprintf(f, "%s$L%d", k ? "," : "", arg_label(op->args[k])->id); @@ -3335,19 +3502,27 @@ static void process_constraint_sets(void) static const TCGArgConstraint *opcode_args_ct(const TCGOp *op) { - const TCGOpDef *def = &tcg_op_defs[op->opc]; + TCGOpcode opc = op->opc; + TCGType type = TCGOP_TYPE(op); + unsigned flags = TCGOP_FLAGS(op); + const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOutOp *outop = all_outop[opc]; TCGConstraintSetIndex con_set; -#ifdef CONFIG_DEBUG_TCG - assert(tcg_op_supported(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); -#endif - if (def->flags & TCG_OPF_NOT_PRESENT) { return empty_cts; } - con_set = tcg_target_op_def(op->opc, TCGOP_TYPE(op), TCGOP_FLAGS(op)); - tcg_debug_assert(con_set >= 0 && con_set < ARRAY_SIZE(constraint_sets)); + if (outop) { + con_set = outop->static_constraint; + if (con_set == C_Dynamic) { + con_set = outop->dynamic_constraint(type, flags); + } + } else { + con_set = tcg_target_op_def(opc, type, flags); + } + tcg_debug_assert(con_set >= 0); + tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets)); /* The constraint arguments must match TCGOpcode arguments. */ tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs); @@ -3376,8 +3551,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op) case INDEX_op_br: remove_label_use(op, 0); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: remove_label_use(op, 3); break; case INDEX_op_brcond2_i32: @@ -3449,21 +3623,21 @@ TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs) } TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); - TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_BEFORE(old_op, new_op, link); return new_op; } TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, - TCGOpcode opc, unsigned nargs) + TCGOpcode opc, TCGType type, unsigned nargs) { TCGOp *new_op = tcg_op_alloc(opc, nargs); - TCGOP_TYPE(new_op) = TCGOP_TYPE(old_op); + TCGOP_TYPE(new_op) = type; QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); return new_op; } @@ -3478,8 +3652,7 @@ static void move_label_uses(TCGLabel *to, TCGLabel *from) case INDEX_op_br: op->args[0] = label_arg(to); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op->args[3] = label_arg(to); break; case INDEX_op_brcond2_i32: @@ -3798,6 +3971,17 @@ liveness_pass_0(TCGContext *s) } } +static void assert_carry_dead(TCGContext *s) +{ + /* + * Carry operations can be separated by a few insns like mov, + * load or store, but they should always be "close", and + * carry-out operations should always be paired with carry-in. + * At various boundaries, carry must have been consumed. + */ + tcg_debug_assert(!s->carry_live); +} + /* Liveness analysis : update the opc_arg_life array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ @@ -3808,28 +3992,28 @@ liveness_pass_1(TCGContext *s) int nb_temps = s->nb_temps; TCGOp *op, *op_prev; TCGRegSet *prefs; - int i; prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps); - for (i = 0; i < nb_temps; ++i) { + for (int i = 0; i < nb_temps; ++i) { s->temps[i].state_ptr = prefs + i; } /* ??? Should be redundant with the exit_tb that ends the TB. */ la_func_end(s, nb_globals, nb_temps); + s->carry_live = false; QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) { int nb_iargs, nb_oargs; TCGOpcode opc_new, opc_new2; - bool have_opc_new2; TCGLifeData arg_life = 0; TCGTemp *ts; TCGOpcode opc = op->opc; - const TCGOpDef *def = &tcg_op_defs[opc]; + const TCGOpDef *def; const TCGArgConstraint *args_ct; switch (opc) { case INDEX_op_call: + assert_carry_dead(s); { const TCGHelperInfo *info = tcg_call_info(op); int call_flags = tcg_call_flags(op); @@ -3839,7 +4023,7 @@ liveness_pass_1(TCGContext *s) /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state != TS_DEAD) { goto do_not_remove_call; @@ -3850,7 +4034,7 @@ liveness_pass_1(TCGContext *s) do_not_remove_call: /* Output args are dead. */ - for (i = 0; i < nb_oargs; i++) { + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -3873,7 +4057,7 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this helper. */ - for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) { + for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; @@ -3893,7 +4077,7 @@ liveness_pass_1(TCGContext *s) * order so that if a temp is used more than once, the stack * reset to max happens before the register reset to 0. */ - for (i = nb_iargs - 1; i >= 0; i--) { + for (int i = nb_iargs - 1; i >= 0; i--) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -3921,7 +4105,7 @@ liveness_pass_1(TCGContext *s) * If a temp is used once, this produces a single set bit; * if a temp is used multiple times, this produces a set. */ - for (i = 0; i < nb_iargs; i++) { + for (int i = 0; i < nb_iargs; i++) { const TCGCallArgumentLoc *loc = &info->in[i]; ts = arg_temp(op->args[nb_oargs + i]); @@ -3941,6 +4125,7 @@ liveness_pass_1(TCGContext *s) } break; case INDEX_op_insn_start: + assert_carry_dead(s); break; case INDEX_op_discard: /* mark the temporary as dead */ @@ -3949,62 +4134,15 @@ liveness_pass_1(TCGContext *s) la_reset_pref(ts); break; - case INDEX_op_add2_i32: - opc_new = INDEX_op_add_i32; - goto do_addsub2; - case INDEX_op_sub2_i32: - opc_new = INDEX_op_sub_i32; - goto do_addsub2; - case INDEX_op_add2_i64: - opc_new = INDEX_op_add_i64; - goto do_addsub2; - case INDEX_op_sub2_i64: - opc_new = INDEX_op_sub_i64; - do_addsub2: - nb_iargs = 4; - nb_oargs = 2; - /* Test if the high part of the operation is dead, but not - the low part. The result can be optimized to a simple - add or sub. This happens often for x86_64 guest when the - cpu mode is set to 32 bit. */ - if (arg_temp(op->args[1])->state == TS_DEAD) { - if (arg_temp(op->args[0])->state == TS_DEAD) { - goto do_remove; - } - /* Replace the opcode and adjust the args in place, - leaving 3 unused args at the end. */ - op->opc = opc = opc_new; - op->args[1] = op->args[2]; - op->args[2] = op->args[4]; - /* Fall through and mark the single-word operation live. */ - nb_iargs = 2; - nb_oargs = 1; - } - goto do_not_remove; - - case INDEX_op_mulu2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_muluh_i32; - have_opc_new2 = TCG_TARGET_HAS_muluh_i32; - goto do_mul2; - case INDEX_op_muls2_i32: - opc_new = INDEX_op_mul_i32; - opc_new2 = INDEX_op_mulsh_i32; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; - goto do_mul2; - case INDEX_op_mulu2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_muluh_i64; - have_opc_new2 = TCG_TARGET_HAS_muluh_i64; - goto do_mul2; - case INDEX_op_muls2_i64: - opc_new = INDEX_op_mul_i64; - opc_new2 = INDEX_op_mulsh_i64; - have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; + case INDEX_op_muls2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_mulsh; goto do_mul2; + case INDEX_op_mulu2: + opc_new = INDEX_op_mul; + opc_new2 = INDEX_op_muluh; do_mul2: - nb_iargs = 2; - nb_oargs = 2; + assert_carry_dead(s); if (arg_temp(op->args[1])->state == TS_DEAD) { if (arg_temp(op->args[0])->state == TS_DEAD) { /* Both parts of the operation are dead. */ @@ -4014,7 +4152,8 @@ liveness_pass_1(TCGContext *s) op->opc = opc = opc_new; op->args[1] = op->args[2]; op->args[2] = op->args[3]; - } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) { + } else if (arg_temp(op->args[0])->state == TS_DEAD && + tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) { /* The low part of the operation is dead; generate the high. */ op->opc = opc = opc_new2; op->args[0] = op->args[1]; @@ -4024,19 +4163,94 @@ liveness_pass_1(TCGContext *s) goto do_not_remove; } /* Mark the single-word operation live. */ - nb_oargs = 1; goto do_not_remove; - default: - /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ - nb_iargs = def->nb_iargs; - nb_oargs = def->nb_oargs; + case INDEX_op_addco: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_add; + goto do_default; + + case INDEX_op_addcio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_addci; + goto do_default; - /* Test if the operation can be removed because all - its outputs are dead. We assume that nb_oargs == 0 - implies side effects */ - if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for (i = 0; i < nb_oargs; i++) { + case INDEX_op_subbo: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, but this may also require canonicalization. */ + op->opc = opc = INDEX_op_sub; + ts = arg_temp(op->args[2]); + if (ts->kind == TEMP_CONST) { + ts = tcg_constant_internal(ts->type, -ts->val); + if (ts->state_ptr == NULL) { + tcg_debug_assert(temp_idx(ts) == nb_temps); + nb_temps++; + ts->state_ptr = tcg_malloc(sizeof(TCGRegSet)); + ts->state = TS_DEAD; + la_reset_pref(ts); + } + op->args[2] = temp_arg(ts); + op->opc = opc = INDEX_op_add; + } + goto do_default; + + case INDEX_op_subbio: + if (s->carry_live) { + goto do_not_remove; + } + op->opc = opc = INDEX_op_subbi; + goto do_default; + + case INDEX_op_addc1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to add, add +1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_add, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, 1); + op->args[2] = temp_arg(ts); + goto do_default; + + case INDEX_op_subb1o: + if (s->carry_live) { + goto do_not_remove; + } + /* Lower to sub, add -1. */ + op_prev = tcg_op_insert_before(s, op, INDEX_op_sub, + TCGOP_TYPE(op), 3); + op_prev->args[0] = op->args[0]; + op_prev->args[1] = op->args[1]; + op_prev->args[2] = op->args[2]; + op->opc = opc = INDEX_op_add; + op->args[1] = op->args[0]; + ts = arg_temp(op->args[0]); + ts = tcg_constant_internal(ts->type, -1); + op->args[2] = temp_arg(ts); + goto do_default; + + default: + do_default: + /* + * Test if the operation can be removed because all + * its outputs are dead. We assume that nb_oargs == 0 + * implies side effects. + */ + def = &tcg_op_defs[opc]; + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) { + for (int i = def->nb_oargs - 1; i >= 0; i--) { if (arg_temp(op->args[i])->state != TS_DEAD) { goto do_not_remove; } @@ -4050,7 +4264,11 @@ liveness_pass_1(TCGContext *s) break; do_not_remove: - for (i = 0; i < nb_oargs; i++) { + def = &tcg_op_defs[opc]; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + for (int i = 0; i < nb_oargs; i++) { ts = arg_temp(op->args[i]); /* Remember the preference of the uses that followed. */ @@ -4071,12 +4289,16 @@ liveness_pass_1(TCGContext *s) /* If end of basic block, update. */ if (def->flags & TCG_OPF_BB_EXIT) { + assert_carry_dead(s); la_func_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_COND_BRANCH) { + assert_carry_dead(s); la_bb_sync(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_BB_END) { + assert_carry_dead(s); la_bb_end(s, nb_globals, nb_temps); } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + assert_carry_dead(s); la_global_sync(s, nb_globals); if (def->flags & TCG_OPF_CALL_CLOBBER) { la_cross_call(s, nb_temps); @@ -4084,15 +4306,18 @@ liveness_pass_1(TCGContext *s) } /* Record arguments that die in this opcode. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { arg_life |= DEAD_ARG << i; } } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = false; + } /* Input arguments are live for preceding opcodes. */ - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { ts = arg_temp(op->args[i]); if (ts->state & TS_DEAD) { /* For operands that were dead, initially allow @@ -4101,11 +4326,13 @@ liveness_pass_1(TCGContext *s) ts->state &= ~TS_DEAD; } } + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = true; + } /* Incorporate constraints for this operand. */ switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_mov: /* Note that these are TCG_OPF_NOT_PRESENT and do not have proper constraints. That said, special case moves to propagate preferences backward. */ @@ -4117,7 +4344,7 @@ liveness_pass_1(TCGContext *s) default: args_ct = opcode_args_ct(op); - for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { const TCGArgConstraint *ct = &args_ct[i]; TCGRegSet set, *pset; @@ -4141,6 +4368,7 @@ liveness_pass_1(TCGContext *s) } op->life = arg_life; } + assert_carry_dead(s); } /* Liveness analysis: Convert indirect regs to direct temporaries. */ @@ -4211,10 +4439,8 @@ liveness_pass_2(TCGContext *s) arg_ts = arg_temp(op->args[i]); dir_ts = arg_ts->state_ptr; if (dir_ts && arg_ts->state == TS_DEAD) { - TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_ld_i32 - : INDEX_op_ld_i64); - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); + TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld, + arg_ts->type, 3); lop->args[0] = temp_arg(dir_ts); lop->args[1] = temp_arg(arg_ts->mem_base); @@ -4263,7 +4489,7 @@ liveness_pass_2(TCGContext *s) } /* Outputs become available. */ - if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) { + if (opc == INDEX_op_mov) { arg_ts = arg_temp(op->args[0]); dir_ts = arg_ts->state_ptr; if (dir_ts) { @@ -4274,10 +4500,8 @@ liveness_pass_2(TCGContext *s) arg_ts->state = 0; if (NEED_SYNC_ARG(0)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, + arg_ts->type, 3); TCGTemp *out_ts = dir_ts; if (IS_DEAD_ARG(0)) { @@ -4310,10 +4534,8 @@ liveness_pass_2(TCGContext *s) /* Sync outputs upon their last write. */ if (NEED_SYNC_ARG(i)) { - TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 - ? INDEX_op_st_i32 - : INDEX_op_st_i64); - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); + TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st, + arg_ts->type, 3); sop->args[0] = temp_arg(dir_ts); sop->args[1] = temp_arg(arg_ts->mem_base); @@ -4726,9 +4948,8 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) { - int i; - - for (i = s->nb_globals; i < s->nb_temps; i++) { + assert_carry_dead(s); + for (int i = s->nb_globals; i < s->nb_temps; i++) { TCGTemp *ts = &s->temps[i]; switch (ts->kind) { @@ -4759,6 +4980,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) */ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs) { + assert_carry_dead(s); sync_globals(s, allocated_regs); for (int i = s->nb_globals; i < s->nb_temps; i++) { @@ -5030,6 +5252,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) int const_args[TCG_MAX_OP_ARGS]; TCGCond op_cond; + if (def->flags & TCG_OPF_CARRY_IN) { + tcg_debug_assert(s->carry_live); + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; @@ -5042,22 +5268,18 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) o_allocated_regs = s->reserved_regs; switch (op->opc) { - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: op_cond = op->args[2]; break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_negsetcond_i32: - case INDEX_op_negsetcond_i64: + case INDEX_op_setcond: + case INDEX_op_negsetcond: case INDEX_op_cmp_vec: op_cond = op->args[3]; break; case INDEX_op_brcond2_i32: op_cond = op->args[4]; break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: + case INDEX_op_movcond: case INDEX_op_setcond2_i32: case INDEX_op_cmpsel_vec: op_cond = op->args[5]; @@ -5290,6 +5512,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) tcg_reg_alloc_bb_end(s, i_allocated_regs); } else { if (def->flags & TCG_OPF_CALL_CLOBBER) { + assert_carry_dead(s); /* XXX: permit generic clobber register list ? */ for (i = 0; i < TCG_TARGET_NB_REGS; i++) { if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) { @@ -5366,50 +5589,345 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) } /* emit instruction */ + TCGType type = TCGOP_TYPE(op); switch (op->opc) { - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]); + case INDEX_op_addc1o: + tcg_out_set_carry(s); + /* fall through */ + case INDEX_op_add: + case INDEX_op_addcio: + case INDEX_op_addco: + case INDEX_op_and: + case INDEX_op_andc: + case INDEX_op_clz: + case INDEX_op_ctz: + case INDEX_op_divs: + case INDEX_op_divu: + case INDEX_op_eqv: + case INDEX_op_mul: + case INDEX_op_mulsh: + case INDEX_op_muluh: + case INDEX_op_nand: + case INDEX_op_nor: + case INDEX_op_or: + case INDEX_op_orc: + case INDEX_op_rems: + case INDEX_op_remu: + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: + case INDEX_op_xor: + { + const TCGOutOpBinary *out = + container_of(all_outop[op->opc], TCGOutOpBinary, base); + + /* Constants should never appear in the first source operand. */ + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; - case INDEX_op_ext8s_i64: - tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]); + + case INDEX_op_sub: + { + const TCGOutOpSubtract *out = &outop_sub; + + /* + * Constants should never appear in the second source operand. + * These are folded to add with negative constant. + */ + tcg_debug_assert(!const_args[2]); + if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - tcg_out_ext8u(s, new_args[0], new_args[1]); + + case INDEX_op_subb1o: + tcg_out_set_borrow(s); + /* fall through */ + case INDEX_op_addci: + case INDEX_op_subbi: + case INDEX_op_subbio: + case INDEX_op_subbo: + { + const TCGOutOpAddSubCarry *out = + container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base); + + if (const_args[2]) { + if (const_args[1]) { + out->out_rii(s, type, new_args[0], + new_args[1], new_args[2]); + } else { + out->out_rri(s, type, new_args[0], + new_args[1], new_args[2]); + } + } else if (const_args[1]) { + out->out_rir(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]); + } + } break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]); + + case INDEX_op_bswap64: + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* fall through */ + case INDEX_op_ctpop: + case INDEX_op_neg: + case INDEX_op_not: + { + const TCGOutOpUnary *out = + container_of(all_outop[op->opc], TCGOutOpUnary, base); + + /* Constants should have been folded. */ + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1]); + } break; - case INDEX_op_ext16s_i64: - tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]); + + case INDEX_op_bswap16: + case INDEX_op_bswap32: + { + const TCGOutOpBswap *out = + container_of(all_outop[op->opc], TCGOutOpBswap, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], new_args[2]); + } break; - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - tcg_out_ext16u(s, new_args[0], new_args[1]); + + case INDEX_op_deposit: + { + const TCGOutOpDeposit *out = &outop_deposit; + + if (const_args[2]) { + tcg_debug_assert(!const_args[1]); + out->out_rri(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } else if (const_args[1]) { + tcg_debug_assert(new_args[1] == 0); + tcg_debug_assert(!const_args[2]); + out->out_rzr(s, type, new_args[0], new_args[2], + new_args[3], new_args[4]); + } else { + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3], new_args[4]); + } + } break; - case INDEX_op_ext32s_i64: - tcg_out_ext32s(s, new_args[0], new_args[1]); + + case INDEX_op_divs2: + case INDEX_op_divu2: + { + const TCGOutOpDivRem *out = + container_of(all_outop[op->opc], TCGOutOpDivRem, base); + + /* Only used by x86 and s390x, which use matching constraints. */ + tcg_debug_assert(new_args[0] == new_args[2]); + tcg_debug_assert(new_args[1] == new_args[3]); + tcg_debug_assert(!const_args[4]); + out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]); + } break; - case INDEX_op_ext32u_i64: - tcg_out_ext32u(s, new_args[0], new_args[1]); + + case INDEX_op_extract: + case INDEX_op_sextract: + { + const TCGOutOpExtract *out = + container_of(all_outop[op->opc], TCGOutOpExtract, base); + + tcg_debug_assert(!const_args[1]); + out->out_rr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_ext_i32_i64: - tcg_out_exts_i32_i64(s, new_args[0], new_args[1]); + + case INDEX_op_extract2: + { + const TCGOutOpExtract2 *out = &outop_extract2; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out_rrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - case INDEX_op_extu_i32_i64: - tcg_out_extu_i32_i64(s, new_args[0], new_args[1]); + + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld32s: + case INDEX_op_ld: + { + const TCGOutOpLoad *out = + container_of(all_outop[op->opc], TCGOutOpLoad, base); + + tcg_debug_assert(!const_args[1]); + out->out(s, type, new_args[0], new_args[1], new_args[2]); + } break; - case INDEX_op_extrl_i64_i32: - tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]); + + case INDEX_op_muls2: + case INDEX_op_mulu2: + { + const TCGOutOpMul2 *out = + container_of(all_outop[op->opc], TCGOutOpMul2, base); + + tcg_debug_assert(!const_args[2]); + tcg_debug_assert(!const_args[3]); + out->out_rrrr(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } break; - default: - if (def->flags & TCG_OPF_VECTOR) { - tcg_out_vec_op(s, op->opc, TCGOP_TYPE(op) - TCG_TYPE_V64, - TCGOP_VECE(op), new_args, const_args); - } else { - tcg_out_op(s, op->opc, TCGOP_TYPE(op), new_args, const_args); + + case INDEX_op_st32: + /* Use tcg_op_st w/ I32. */ + type = TCG_TYPE_I32; + /* fall through */ + case INDEX_op_st: + case INDEX_op_st8: + case INDEX_op_st16: + { + const TCGOutOpStore *out = + container_of(all_outop[op->opc], TCGOutOpStore, base); + + if (const_args[0]) { + out->out_i(s, type, new_args[0], new_args[1], new_args[2]); + } else { + out->out_r(s, type, new_args[0], new_args[1], new_args[2]); + } + } + break; + + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: + { + const TCGOutOpQemuLdSt *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt, base); + + out->out(s, type, new_args[0], new_args[1], new_args[2]); } break; + + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + { + const TCGOutOpQemuLdSt2 *out = + container_of(all_outop[op->opc], TCGOutOpQemuLdSt2, base); + + out->out(s, type, new_args[0], new_args[1], + new_args[2], new_args[3]); + } + break; + + case INDEX_op_brcond: + { + const TCGOutOpBrcond *out = &outop_brcond; + TCGCond cond = new_args[2]; + TCGLabel *label = arg_label(new_args[3]); + + tcg_debug_assert(!const_args[0]); + if (const_args[1]) { + out->out_ri(s, type, cond, new_args[0], new_args[1], label); + } else { + out->out_rr(s, type, cond, new_args[0], new_args[1], label); + } + } + break; + + case INDEX_op_movcond: + { + const TCGOutOpMovcond *out = &outop_movcond; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + out->out(s, type, cond, new_args[0], + new_args[1], new_args[2], const_args[2], + new_args[3], const_args[3], + new_args[4], const_args[4]); + } + break; + + case INDEX_op_setcond: + case INDEX_op_negsetcond: + { + const TCGOutOpSetcond *out = + container_of(all_outop[op->opc], TCGOutOpSetcond, base); + TCGCond cond = new_args[3]; + + tcg_debug_assert(!const_args[1]); + if (const_args[2]) { + out->out_rri(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } else { + out->out_rrr(s, type, cond, + new_args[0], new_args[1], new_args[2]); + } + } + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + { + const TCGOutOpBrcond2 *out = &outop_brcond2; + TCGCond cond = new_args[4]; + TCGLabel *label = arg_label(new_args[5]); + + tcg_debug_assert(!const_args[0]); + tcg_debug_assert(!const_args[1]); + out->out(s, cond, new_args[0], new_args[1], + new_args[2], const_args[2], + new_args[3], const_args[3], label); + } + break; + case INDEX_op_setcond2_i32: + { + const TCGOutOpSetcond2 *out = &outop_setcond2; + TCGCond cond = new_args[5]; + + tcg_debug_assert(!const_args[1]); + tcg_debug_assert(!const_args[2]); + out->out(s, cond, new_args[0], new_args[1], new_args[2], + new_args[3], const_args[3], new_args[4], const_args[4]); + } + break; +#else + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + g_assert_not_reached(); +#endif + + case INDEX_op_goto_ptr: + tcg_debug_assert(!const_args[0]); + tcg_out_goto_ptr(s, new_args[0]); + break; + + default: + tcg_debug_assert(def->flags & TCG_OPF_VECTOR); + tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64, + TCGOP_VECE(op), new_args, const_args); + break; + } + + if (def->flags & TCG_OPF_CARRY_IN) { + s->carry_live = false; + } + if (def->flags & TCG_OPF_CARRY_OUT) { + s->carry_live = true; } /* move the outputs in the correct register if needed */ @@ -6414,12 +6932,28 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_out_tb_start(s); num_insns = -1; + s->carry_live = false; QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; switch (opc) { - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: + case INDEX_op_extrl_i64_i32: + assert(TCG_TARGET_REG_BITS == 64); + /* + * If TCG_TYPE_I32 is represented in some canonical form, + * e.g. zero or sign-extended, then emit as a unary op. + * Otherwise we can treat this as a plain move. + * If the output dies, treat this as a plain move, because + * this will be implemented with a store. + */ + if (TCG_TARGET_HAS_extr_i64_i32) { + TCGLifeData arg_life = op->life; + if (!IS_DEAD_ARG(0)) { + goto do_default; + } + } + /* fall through */ + case INDEX_op_mov: case INDEX_op_mov_vec: tcg_reg_alloc_mov(s, op); break; @@ -6427,6 +6961,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_reg_alloc_dup(s, op); break; case INDEX_op_insn_start: + assert_carry_dead(s); if (num_insns >= 0) { size_t off = tcg_current_code_size(s); s->gen_insn_end_off[num_insns] = off; @@ -6447,6 +6982,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) tcg_out_label(s, arg_label(op->args[0])); break; case INDEX_op_call: + assert_carry_dead(s); tcg_reg_alloc_call(s, op); break; case INDEX_op_exit_tb: @@ -6455,12 +6991,19 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) case INDEX_op_goto_tb: tcg_out_goto_tb(s, op->args[0]); break; + case INDEX_op_br: + tcg_out_br(s, arg_label(op->args[0])); + break; + case INDEX_op_mb: + tcg_out_mb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; } /* fall through */ default: + do_default: /* Sanity check that we've not introduced any unhandled opcodes. */ tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op), TCGOP_FLAGS(op))); @@ -6482,6 +7025,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return -2; } } + assert_carry_dead(s); + tcg_debug_assert(num_insns + 1 == s->gen_tb->icount); s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); @@ -26,6 +26,11 @@ #include <ffi.h> +#define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS) +#define deposit_tr glue(deposit, TCG_TARGET_REG_BITS) +#define extract_tr glue(extract, TCG_TARGET_REG_BITS) +#define sextract_tr glue(sextract, TCG_TARGET_REG_BITS) + /* * Enable TCI assertions only when debugging TCG (and without NDEBUG defined). * Without assertions, the interpreter runs much faster. @@ -174,17 +179,6 @@ static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, *c5 = extract32(insn, 28, 4); } -static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1, - TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5) -{ - *r0 = extract32(insn, 8, 4); - *r1 = extract32(insn, 12, 4); - *r2 = extract32(insn, 16, 4); - *r3 = extract32(insn, 20, 4); - *r4 = extract32(insn, 24, 4); - *r5 = extract32(insn, 28, 4); -} - static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) { bool result = false; @@ -331,18 +325,6 @@ static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val, } } -#if TCG_TARGET_REG_BITS == 64 -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) \ - case glue(glue(INDEX_op_, x), _i64): -#else -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) -#endif - /* Interpret pseudo code in tb. */ /* * Disable CFI checks. @@ -356,6 +338,7 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tcg_target_ulong regs[TCG_TARGET_NB_REGS]; uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) / sizeof(uint64_t)]; + bool carry = false; regs[TCG_AREG0] = (tcg_target_ulong)env; regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; @@ -364,13 +347,12 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, for (;;) { uint32_t insn; TCGOpcode opc; - TCGReg r0, r1, r2, r3, r4, r5; + TCGReg r0, r1, r2, r3, r4; tcg_target_ulong t1; TCGCond condition; uint8_t pos, len; uint32_t tmp32; uint64_t tmp64, taddr; - uint64_t T1, T2; MemOpIdx oi; int32_t ofs; void *ptr; @@ -436,34 +418,25 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_l(insn, tb_ptr, &ptr); tb_ptr = ptr; continue; - case INDEX_op_setcond_i32: - tci_args_rrrc(insn, &r0, &r1, &r2, &condition); - regs[r0] = tci_compare32(regs[r1], regs[r2], condition); - break; - case INDEX_op_movcond_i32: - tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); - tmp32 = tci_compare32(regs[r1], regs[r2], condition); - regs[r0] = regs[tmp32 ? r3 : r4]; - break; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); - T1 = tci_uint64(regs[r2], regs[r1]); - T2 = tci_uint64(regs[r4], regs[r3]); - regs[r0] = tci_compare64(T1, T2, condition); + regs[r0] = tci_compare64(tci_uint64(regs[r2], regs[r1]), + tci_uint64(regs[r4], regs[r3]), + condition); break; #elif TCG_TARGET_REG_BITS == 64 - case INDEX_op_setcond_i64: + case INDEX_op_setcond: tci_args_rrrc(insn, &r0, &r1, &r2, &condition); regs[r0] = tci_compare64(regs[r1], regs[r2], condition); break; - case INDEX_op_movcond_i64: + case INDEX_op_movcond: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); tmp32 = tci_compare64(regs[r1], regs[r2], condition); regs[r0] = regs[tmp32 ? r3 : r4]; break; #endif - CASE_32_64(mov) + case INDEX_op_mov: tci_args_rr(insn, &r0, &r1); regs[r0] = regs[r1]; break; @@ -475,411 +448,325 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tci_args_rl(insn, tb_ptr, &r0, &ptr); regs[r0] = *(tcg_target_ulong *)ptr; break; + case INDEX_op_tci_setcarry: + carry = true; + break; /* Load/store operations (32 bit). */ - CASE_32_64(ld8u) + case INDEX_op_ld8u: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint8_t *)ptr; break; - CASE_32_64(ld8s) + case INDEX_op_ld8s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int8_t *)ptr; break; - CASE_32_64(ld16u) + case INDEX_op_ld16u: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(uint16_t *)ptr; break; - CASE_32_64(ld16s) + case INDEX_op_ld16s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); regs[r0] = *(int16_t *)ptr; break; - case INDEX_op_ld_i32: - CASE_64(ld32u) + case INDEX_op_ld: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - regs[r0] = *(uint32_t *)ptr; + regs[r0] = *(tcg_target_ulong *)ptr; break; - CASE_32_64(st8) + case INDEX_op_st8: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint8_t *)ptr = regs[r0]; break; - CASE_32_64(st16) + case INDEX_op_st16: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); *(uint16_t *)ptr = regs[r0]; break; - case INDEX_op_st_i32: - CASE_64(st32) + case INDEX_op_st: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - *(uint32_t *)ptr = regs[r0]; + *(tcg_target_ulong *)ptr = regs[r0]; break; /* Arithmetic operations (mixed 32/64 bit). */ - CASE_32_64(add) + case INDEX_op_add: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] + regs[r2]; break; - CASE_32_64(sub) + case INDEX_op_sub: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] - regs[r2]; break; - CASE_32_64(mul) + case INDEX_op_mul: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] * regs[r2]; break; - CASE_32_64(and) + case INDEX_op_and: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & regs[r2]; break; - CASE_32_64(or) + case INDEX_op_or: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | regs[r2]; break; - CASE_32_64(xor) + case INDEX_op_xor: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; -#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64 - CASE_32_64(andc) + case INDEX_op_andc: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & ~regs[r2]; break; -#endif -#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64 - CASE_32_64(orc) + case INDEX_op_orc: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] | ~regs[r2]; break; -#endif -#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64 - CASE_32_64(eqv) + case INDEX_op_eqv: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] ^ regs[r2]); break; -#endif -#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64 - CASE_32_64(nand) + case INDEX_op_nand: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] & regs[r2]); break; -#endif -#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64 - CASE_32_64(nor) + case INDEX_op_nor: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ~(regs[r1] | regs[r2]); break; + case INDEX_op_neg: + tci_args_rr(insn, &r0, &r1); + regs[r0] = -regs[r1]; + break; + case INDEX_op_not: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ~regs[r1]; + break; + case INDEX_op_ctpop: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop_tr(regs[r1]); + break; + case INDEX_op_addco: + tci_args_rrr(insn, &r0, &r1, &r2); + t1 = regs[r1] + regs[r2]; + carry = t1 < regs[r1]; + regs[r0] = t1; + break; + case INDEX_op_addci: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] + regs[r2] + carry; + break; + case INDEX_op_addcio: + tci_args_rrr(insn, &r0, &r1, &r2); + if (carry) { + t1 = regs[r1] + regs[r2] + 1; + carry = t1 <= regs[r1]; + } else { + t1 = regs[r1] + regs[r2]; + carry = t1 < regs[r1]; + } + regs[r0] = t1; + break; + case INDEX_op_subbo: + tci_args_rrr(insn, &r0, &r1, &r2); + carry = regs[r1] < regs[r2]; + regs[r0] = regs[r1] - regs[r2]; + break; + case INDEX_op_subbi: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] - regs[r2] - carry; + break; + case INDEX_op_subbio: + tci_args_rrr(insn, &r0, &r1, &r2); + if (carry) { + carry = regs[r1] <= regs[r2]; + regs[r0] = regs[r1] - regs[r2] - 1; + } else { + carry = regs[r1] < regs[r2]; + regs[r0] = regs[r1] - regs[r2]; + } + break; + case INDEX_op_muls2: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); +#if TCG_TARGET_REG_BITS == 32 + tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); +#else + muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); +#endif + break; + case INDEX_op_mulu2: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); +#if TCG_TARGET_REG_BITS == 32 + tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; + tci_write_reg64(regs, r1, r0, tmp64); +#else + mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); #endif + break; /* Arithmetic operations (32 bit). */ - case INDEX_op_div_i32: + case INDEX_op_tci_divs32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; break; - case INDEX_op_divu_i32: + case INDEX_op_tci_divu32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; break; - case INDEX_op_rem_i32: + case INDEX_op_tci_rems32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; break; - case INDEX_op_remu_i32: + case INDEX_op_tci_remu32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; break; -#if TCG_TARGET_HAS_clz_i32 - case INDEX_op_clz_i32: + case INDEX_op_tci_clz32: tci_args_rrr(insn, &r0, &r1, &r2); tmp32 = regs[r1]; regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; break; -#endif -#if TCG_TARGET_HAS_ctz_i32 - case INDEX_op_ctz_i32: + case INDEX_op_tci_ctz32: tci_args_rrr(insn, &r0, &r1, &r2); tmp32 = regs[r1]; regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; break; -#endif -#if TCG_TARGET_HAS_ctpop_i32 - case INDEX_op_ctpop_i32: - tci_args_rr(insn, &r0, &r1); - regs[r0] = ctpop32(regs[r1]); + case INDEX_op_tci_setcond32: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare32(regs[r1], regs[r2], condition); + break; + case INDEX_op_tci_movcond32: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare32(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; break; -#endif - /* Shift/rotate operations (32 bit). */ + /* Shift/rotate operations. */ - case INDEX_op_shl_i32: + case INDEX_op_shl: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31); + regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS); break; - case INDEX_op_shr_i32: + case INDEX_op_shr: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31); + regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS); break; - case INDEX_op_sar_i32: + case INDEX_op_sar: tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31); + regs[r0] = ((tcg_target_long)regs[r1] + >> (regs[r2] % TCG_TARGET_REG_BITS)); break; -#if TCG_TARGET_HAS_rot_i32 - case INDEX_op_rotl_i32: + case INDEX_op_tci_rotl32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol32(regs[r1], regs[r2] & 31); break; - case INDEX_op_rotr_i32: + case INDEX_op_tci_rotr32: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror32(regs[r1], regs[r2] & 31); break; -#endif - case INDEX_op_deposit_i32: + case INDEX_op_deposit: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); - regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); + regs[r0] = deposit_tr(regs[r1], pos, len, regs[r2]); break; - case INDEX_op_extract_i32: + case INDEX_op_extract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = extract32(regs[r1], pos, len); + regs[r0] = extract_tr(regs[r1], pos, len); break; - case INDEX_op_sextract_i32: + case INDEX_op_sextract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = sextract32(regs[r1], pos, len); + regs[r0] = sextract_tr(regs[r1], pos, len); break; - case INDEX_op_brcond_i32: + case INDEX_op_brcond: tci_args_rl(insn, tb_ptr, &r0, &ptr); - if ((uint32_t)regs[r0]) { + if (regs[r0]) { tb_ptr = ptr; } break; -#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32 - case INDEX_op_add2_i32: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = tci_uint64(regs[r3], regs[r2]); - T2 = tci_uint64(regs[r5], regs[r4]); - tci_write_reg64(regs, r1, r0, T1 + T2); - break; -#endif -#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32 - case INDEX_op_sub2_i32: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = tci_uint64(regs[r3], regs[r2]); - T2 = tci_uint64(regs[r5], regs[r4]); - tci_write_reg64(regs, r1, r0, T1 - T2); - break; -#endif -#if TCG_TARGET_HAS_mulu2_i32 - case INDEX_op_mulu2_i32: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3]; - tci_write_reg64(regs, r1, r0, tmp64); - break; -#endif -#if TCG_TARGET_HAS_muls2_i32 - case INDEX_op_muls2_i32: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3]; - tci_write_reg64(regs, r1, r0, tmp64); - break; -#endif -#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 - CASE_32_64(ext8s) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (int8_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \ - TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 - CASE_32_64(ext16s) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (int16_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64 - CASE_32_64(ext8u) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (uint8_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64 - CASE_32_64(ext16u) - tci_args_rr(insn, &r0, &r1); - regs[r0] = (uint16_t)regs[r1]; - break; -#endif -#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64 - CASE_32_64(bswap16) + case INDEX_op_bswap16: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap16(regs[r1]); break; -#endif -#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64 - CASE_32_64(bswap32) + case INDEX_op_bswap32: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap32(regs[r1]); break; -#endif -#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64 - CASE_32_64(not) - tci_args_rr(insn, &r0, &r1); - regs[r0] = ~regs[r1]; - break; -#endif - CASE_32_64(neg) - tci_args_rr(insn, &r0, &r1); - regs[r0] = -regs[r1]; - break; #if TCG_TARGET_REG_BITS == 64 /* Load/store operations (64 bit). */ - case INDEX_op_ld32s_i64: + case INDEX_op_ld32u: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - regs[r0] = *(int32_t *)ptr; + regs[r0] = *(uint32_t *)ptr; break; - case INDEX_op_ld_i64: + case INDEX_op_ld32s: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - regs[r0] = *(uint64_t *)ptr; + regs[r0] = *(int32_t *)ptr; break; - case INDEX_op_st_i64: + case INDEX_op_st32: tci_args_rrs(insn, &r0, &r1, &ofs); ptr = (void *)(regs[r1] + ofs); - *(uint64_t *)ptr = regs[r0]; + *(uint32_t *)ptr = regs[r0]; break; /* Arithmetic operations (64 bit). */ - case INDEX_op_div_i64: + case INDEX_op_divs: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; break; - case INDEX_op_divu_i64: + case INDEX_op_divu: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; break; - case INDEX_op_rem_i64: + case INDEX_op_rems: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; break; - case INDEX_op_remu_i64: + case INDEX_op_remu: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; break; -#if TCG_TARGET_HAS_clz_i64 - case INDEX_op_clz_i64: + case INDEX_op_clz: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; break; -#endif -#if TCG_TARGET_HAS_ctz_i64 - case INDEX_op_ctz_i64: + case INDEX_op_ctz: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; break; -#endif -#if TCG_TARGET_HAS_ctpop_i64 - case INDEX_op_ctpop_i64: - tci_args_rr(insn, &r0, &r1); - regs[r0] = ctpop64(regs[r1]); - break; -#endif -#if TCG_TARGET_HAS_mulu2_i64 - case INDEX_op_mulu2_i64: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]); - break; -#endif -#if TCG_TARGET_HAS_muls2_i64 - case INDEX_op_muls2_i64: - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - muls64(®s[r0], ®s[r1], regs[r2], regs[r3]); - break; -#endif -#if TCG_TARGET_HAS_add2_i64 - case INDEX_op_add2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = regs[r2] + regs[r4]; - T2 = regs[r3] + regs[r5] + (T1 < regs[r2]); - regs[r0] = T1; - regs[r1] = T2; - break; -#endif -#if TCG_TARGET_HAS_add2_i64 - case INDEX_op_sub2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - T1 = regs[r2] - regs[r4]; - T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]); - regs[r0] = T1; - regs[r1] = T2; - break; -#endif /* Shift/rotate operations (64 bit). */ - case INDEX_op_shl_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = regs[r1] << (regs[r2] & 63); - break; - case INDEX_op_shr_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = regs[r1] >> (regs[r2] & 63); - break; - case INDEX_op_sar_i64: - tci_args_rrr(insn, &r0, &r1, &r2); - regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63); - break; -#if TCG_TARGET_HAS_rot_i64 - case INDEX_op_rotl_i64: + case INDEX_op_rotl: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = rol64(regs[r1], regs[r2] & 63); break; - case INDEX_op_rotr_i64: + case INDEX_op_rotr: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror64(regs[r1], regs[r2] & 63); break; -#endif - case INDEX_op_deposit_i64: - tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); - regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); - break; - case INDEX_op_extract_i64: - tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = extract64(regs[r1], pos, len); - break; - case INDEX_op_sextract_i64: - tci_args_rrbb(insn, &r0, &r1, &pos, &len); - regs[r0] = sextract64(regs[r1], pos, len); - break; - case INDEX_op_brcond_i64: - tci_args_rl(insn, tb_ptr, &r0, &ptr); - if (regs[r0]) { - tb_ptr = ptr; - } - break; - case INDEX_op_ext32s_i64: case INDEX_op_ext_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (int32_t)regs[r1]; break; - case INDEX_op_ext32u_i64: case INDEX_op_extu_i32_i64: tci_args_rr(insn, &r0, &r1); regs[r0] = (uint32_t)regs[r1]; break; -#if TCG_TARGET_HAS_bswap64_i64 - case INDEX_op_bswap64_i64: + case INDEX_op_bswap64: tci_args_rr(insn, &r0, &r1); regs[r0] = bswap64(regs[r1]); break; -#endif #endif /* TCG_TARGET_REG_BITS == 64 */ /* QEMU specific operations. */ @@ -902,46 +789,33 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tb_ptr = ptr; break; - case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_ld: tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); break; - case INDEX_op_qemu_ld_i64: - if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(insn, &r0, &r1, &oi); - taddr = regs[r1]; - } else { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - taddr = regs[r2]; - oi = regs[r3]; - } - tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); - if (TCG_TARGET_REG_BITS == 32) { - tci_write_reg64(regs, r1, r0, tmp64); - } else { - regs[r0] = tmp64; - } - break; - - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_st: tci_args_rrm(insn, &r0, &r1, &oi); taddr = regs[r1]; tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); break; - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 64) { - tci_args_rrm(insn, &r0, &r1, &oi); - tmp64 = regs[r0]; - taddr = regs[r1]; - } else { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - tmp64 = tci_uint64(regs[r1], regs[r0]); - taddr = regs[r2]; - oi = regs[r3]; - } + case INDEX_op_qemu_ld2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + taddr = regs[r2]; + oi = regs[r3]; + tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr); + tci_write_reg64(regs, r1, r0, tmp64); + break; + + case INDEX_op_qemu_st2: + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + tmp64 = tci_uint64(regs[r1], regs[r0]); + taddr = regs[r2]; + oi = regs[r3]; tci_qemu_st(env, taddr, tmp64, oi, tb_ptr); break; @@ -1005,7 +879,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) const char *op_name; uint32_t insn; TCGOpcode op; - TCGReg r0, r1, r2, r3, r4, r5; + TCGReg r0, r1, r2, r3, r4; tcg_target_ulong i1; int32_t s2; TCGCond c; @@ -1040,15 +914,14 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr); break; - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: + case INDEX_op_brcond: tci_args_rl(insn, tb_ptr, &r0, &ptr); info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p", op_name, str_r(r0), ptr); break; - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: + case INDEX_op_setcond: + case INDEX_op_tci_setcond32: tci_args_rrrc(insn, &r0, &r1, &r2, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c)); @@ -1066,126 +939,95 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) op_name, str_r(r0), ptr); break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i32: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i32: - case INDEX_op_ld_i64: - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i32: - case INDEX_op_st_i64: + case INDEX_op_tci_setcarry: + info->fprintf_func(info->stream, "%-12s", op_name); + break; + + case INDEX_op_ld8u: + case INDEX_op_ld8s: + case INDEX_op_ld16u: + case INDEX_op_ld16s: + case INDEX_op_ld32u: + case INDEX_op_ld: + case INDEX_op_st8: + case INDEX_op_st16: + case INDEX_op_st32: + case INDEX_op_st: tci_args_rrs(insn, &r0, &r1, &s2); info->fprintf_func(info->stream, "%-12s %s, %s, %d", op_name, str_r(r0), str_r(r1), s2); break; - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: + case INDEX_op_bswap16: + case INDEX_op_bswap32: + case INDEX_op_ctpop: + case INDEX_op_mov: + case INDEX_op_neg: + case INDEX_op_not: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: + case INDEX_op_bswap64: tci_args_rr(insn, &r0, &r1); info->fprintf_func(info->stream, "%-12s %s, %s", op_name, str_r(r0), str_r(r1)); break; - case INDEX_op_add_i32: - case INDEX_op_add_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - case INDEX_op_div_i32: - case INDEX_op_div_i64: - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: + case INDEX_op_add: + case INDEX_op_addci: + case INDEX_op_addcio: + case INDEX_op_addco: + case INDEX_op_and: + case INDEX_op_andc: + case INDEX_op_clz: + case INDEX_op_ctz: + case INDEX_op_divs: + case INDEX_op_divu: + case INDEX_op_eqv: + case INDEX_op_mul: + case INDEX_op_nand: + case INDEX_op_nor: + case INDEX_op_or: + case INDEX_op_orc: + case INDEX_op_rems: + case INDEX_op_remu: + case INDEX_op_rotl: + case INDEX_op_rotr: + case INDEX_op_sar: + case INDEX_op_shl: + case INDEX_op_shr: + case INDEX_op_sub: + case INDEX_op_subbi: + case INDEX_op_subbio: + case INDEX_op_subbo: + case INDEX_op_xor: + case INDEX_op_tci_ctz32: + case INDEX_op_tci_clz32: + case INDEX_op_tci_divs32: + case INDEX_op_tci_divu32: + case INDEX_op_tci_rems32: + case INDEX_op_tci_remu32: + case INDEX_op_tci_rotl32: + case INDEX_op_tci_rotr32: tci_args_rrr(insn, &r0, &r1, &r2); info->fprintf_func(info->stream, "%-12s %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2)); break; - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: + case INDEX_op_deposit: tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d", op_name, str_r(r0), str_r(r1), str_r(r2), pos, len); break; - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: + case INDEX_op_extract: + case INDEX_op_sextract: tci_args_rrbb(insn, &r0, &r1, &pos, &len); info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d", op_name, str_r(r0), str_r(r1), pos, len); break; - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: + case INDEX_op_tci_movcond32: + case INDEX_op_movcond: case INDEX_op_setcond2_i32: tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", @@ -1193,43 +1035,29 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info) str_r(r3), str_r(r4), str_c(c)); break; - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: + case INDEX_op_muls2: + case INDEX_op_mulu2: tci_args_rrrr(insn, &r0, &r1, &r2, &r3); info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", op_name, str_r(r0), str_r(r1), str_r(r2), str_r(r3)); break; - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s", - op_name, str_r(r0), str_r(r1), str_r(r2), - str_r(r3), str_r(r4), str_r(r5)); - break; - - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 32) { - tci_args_rrrr(insn, &r0, &r1, &r2, &r3); - info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", - op_name, str_r(r0), str_r(r1), - str_r(r2), str_r(r3)); - break; - } - /* fall through */ - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld: + case INDEX_op_qemu_st: tci_args_rrm(insn, &r0, &r1, &oi); info->fprintf_func(info->stream, "%-12s %s, %s, %x", op_name, str_r(r0), str_r(r1), oi); break; + case INDEX_op_qemu_ld2: + case INDEX_op_qemu_st2: + tci_args_rrrr(insn, &r0, &r1, &r2, &r3); + info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s", + op_name, str_r(r0), str_r(r1), + str_r(r2), str_r(r3)); + break; + case 0: /* tcg_out_nop_fill uses zeros */ if (insn == 0) { diff --git a/tcg/tci/tcg-target-has.h b/tcg/tci/tcg-target-has.h index c8785ca..ab07ce1 100644 --- a/tcg/tci/tcg-target-has.h +++ b/tcg/tci/tcg-target-has.h @@ -7,67 +7,8 @@ #ifndef TCG_TARGET_HAS_H #define TCG_TARGET_HAS_H -#define TCG_TARGET_HAS_bswap16_i32 1 -#define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 1 -#define TCG_TARGET_HAS_ext8s_i32 1 -#define TCG_TARGET_HAS_ext16s_i32 1 -#define TCG_TARGET_HAS_ext8u_i32 1 -#define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_andc_i32 1 -#define TCG_TARGET_HAS_extract2_i32 0 -#define TCG_TARGET_HAS_eqv_i32 1 -#define TCG_TARGET_HAS_nand_i32 1 -#define TCG_TARGET_HAS_nor_i32 1 -#define TCG_TARGET_HAS_clz_i32 1 -#define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 1 -#define TCG_TARGET_HAS_not_i32 1 -#define TCG_TARGET_HAS_orc_i32 1 -#define TCG_TARGET_HAS_rot_i32 1 -#define TCG_TARGET_HAS_negsetcond_i32 0 -#define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_qemu_st8_i32 0 - #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_extr_i64_i32 0 -#define TCG_TARGET_HAS_bswap16_i64 1 -#define TCG_TARGET_HAS_bswap32_i64 1 -#define TCG_TARGET_HAS_bswap64_i64 1 -#define TCG_TARGET_HAS_extract2_i64 0 -#define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 1 -#define TCG_TARGET_HAS_ext8s_i64 1 -#define TCG_TARGET_HAS_ext16s_i64 1 -#define TCG_TARGET_HAS_ext32s_i64 1 -#define TCG_TARGET_HAS_ext8u_i64 1 -#define TCG_TARGET_HAS_ext16u_i64 1 -#define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_andc_i64 1 -#define TCG_TARGET_HAS_eqv_i64 1 -#define TCG_TARGET_HAS_nand_i64 1 -#define TCG_TARGET_HAS_nor_i64 1 -#define TCG_TARGET_HAS_clz_i64 1 -#define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 1 -#define TCG_TARGET_HAS_not_i64 1 -#define TCG_TARGET_HAS_orc_i64 1 -#define TCG_TARGET_HAS_rot_i64 1 -#define TCG_TARGET_HAS_negsetcond_i64 0 -#define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_add2_i32 1 -#define TCG_TARGET_HAS_sub2_i32 1 -#define TCG_TARGET_HAS_mulu2_i32 1 -#define TCG_TARGET_HAS_add2_i64 1 -#define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 -#else -#define TCG_TARGET_HAS_mulu2_i32 1 #endif /* TCG_TARGET_REG_BITS == 64 */ #define TCG_TARGET_HAS_qemu_ldst_i128 0 diff --git a/tcg/tci/tcg-target-opc.h.inc b/tcg/tci/tcg-target-opc.h.inc index ecc8c4e..4eb32ed 100644 --- a/tcg/tci/tcg-target-opc.h.inc +++ b/tcg/tci/tcg-target-opc.h.inc @@ -2,3 +2,14 @@ /* These opcodes for use between the tci generator and interpreter. */ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_setcarry, 0, 0, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 36e018d..35c66a4 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -39,148 +39,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) { - switch (op) { - case INDEX_op_goto_ptr: - return C_O0_I1(r); - - case INDEX_op_ld8u_i32: - case INDEX_op_ld8s_i32: - case INDEX_op_ld16u_i32: - case INDEX_op_ld16s_i32: - case INDEX_op_ld_i32: - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_not_i32: - case INDEX_op_not_i64: - case INDEX_op_neg_i32: - case INDEX_op_neg_i64: - case INDEX_op_ext8s_i32: - case INDEX_op_ext8s_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: - case INDEX_op_bswap64_i64: - case INDEX_op_extract_i32: - case INDEX_op_extract_i64: - case INDEX_op_sextract_i32: - case INDEX_op_sextract_i64: - case INDEX_op_ctpop_i32: - case INDEX_op_ctpop_i64: - return C_O1_I1(r, r); - - case INDEX_op_st8_i32: - case INDEX_op_st16_i32: - case INDEX_op_st_i32: - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(r, r); - - case INDEX_op_div_i32: - case INDEX_op_div_i64: - case INDEX_op_divu_i32: - case INDEX_op_divu_i64: - case INDEX_op_rem_i32: - case INDEX_op_rem_i64: - case INDEX_op_remu_i32: - case INDEX_op_remu_i64: - case INDEX_op_add_i32: - case INDEX_op_add_i64: - case INDEX_op_sub_i32: - case INDEX_op_sub_i64: - case INDEX_op_mul_i32: - case INDEX_op_mul_i64: - case INDEX_op_and_i32: - case INDEX_op_and_i64: - case INDEX_op_andc_i32: - case INDEX_op_andc_i64: - case INDEX_op_eqv_i32: - case INDEX_op_eqv_i64: - case INDEX_op_nand_i32: - case INDEX_op_nand_i64: - case INDEX_op_nor_i32: - case INDEX_op_nor_i64: - case INDEX_op_or_i32: - case INDEX_op_or_i64: - case INDEX_op_orc_i32: - case INDEX_op_orc_i64: - case INDEX_op_xor_i32: - case INDEX_op_xor_i64: - case INDEX_op_shl_i32: - case INDEX_op_shl_i64: - case INDEX_op_shr_i32: - case INDEX_op_shr_i64: - case INDEX_op_sar_i32: - case INDEX_op_sar_i64: - case INDEX_op_rotl_i32: - case INDEX_op_rotl_i64: - case INDEX_op_rotr_i32: - case INDEX_op_rotr_i64: - case INDEX_op_setcond_i32: - case INDEX_op_setcond_i64: - case INDEX_op_deposit_i32: - case INDEX_op_deposit_i64: - case INDEX_op_clz_i32: - case INDEX_op_clz_i64: - case INDEX_op_ctz_i32: - case INDEX_op_ctz_i64: - return C_O1_I2(r, r, r); - - case INDEX_op_brcond_i32: - case INDEX_op_brcond_i64: - return C_O0_I2(r, r); - - case INDEX_op_add2_i32: - case INDEX_op_add2_i64: - case INDEX_op_sub2_i32: - case INDEX_op_sub2_i64: - return C_O2_I4(r, r, r, r, r, r); - -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - return C_O0_I4(r, r, r, r); -#endif - - case INDEX_op_mulu2_i32: - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i32: - case INDEX_op_muls2_i64: - return C_O2_I2(r, r, r, r); - - case INDEX_op_movcond_i32: - case INDEX_op_movcond_i64: - case INDEX_op_setcond2_i32: - return C_O1_I4(r, r, r, r, r); - - case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, r); - case INDEX_op_qemu_ld_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r); - case INDEX_op_qemu_st_i32: - return C_O0_I2(r, r); - case INDEX_op_qemu_st_i64: - return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r); - - default: - return C_NotImplemented; - } + return C_NotImplemented; } static const int tcg_target_reg_alloc_order[] = { @@ -443,31 +302,13 @@ static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, tcg_out32(s, insn); } -static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op, - TCGReg r0, TCGReg r1, TCGReg r2, - TCGReg r3, TCGReg r4, TCGReg r5) -{ - tcg_insn_unit insn = 0; - - insn = deposit32(insn, 0, 8, op); - insn = deposit32(insn, 8, 4, r0); - insn = deposit32(insn, 12, 4, r1); - insn = deposit32(insn, 16, 4, r2); - insn = deposit32(insn, 20, 4, r3); - insn = deposit32(insn, 24, 4, r4); - insn = deposit32(insn, 28, 4, r5); - tcg_out32(s, insn); -} - static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, TCGReg base, intptr_t offset) { stack_bounds_check(base, offset); if (offset != sextract32(offset, 0, 16)) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); - tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32 - ? INDEX_op_add_i32 : INDEX_op_add_i64), - TCG_REG_TMP, TCG_REG_TMP, base); + tcg_out_op_rrr(s, INDEX_op_add, TCG_REG_TMP, TCG_REG_TMP, base); base = TCG_REG_TMP; offset = 0; } @@ -477,34 +318,17 @@ static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, intptr_t offset) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset); - break; -#endif - default: - g_assert_not_reached(); + TCGOpcode op = INDEX_op_ld; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + op = INDEX_op_ld32u; } + tcg_out_ldst(s, op, val, base, offset); } static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_op_rr(s, INDEX_op_mov_i32, ret, arg); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_op_rr(s, INDEX_op_mov_i64, ret, arg); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_op_rr(s, INDEX_op_mov, ret, arg); return true; } @@ -535,76 +359,62 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_extract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, pos, len); +} + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_extract, +}; + +static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + tcg_out_op_rrbb(s, INDEX_op_sextract, rd, rs, pos, len); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_sextract, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - switch (type) { - case TCG_TYPE_I32: - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i32); - tcg_out_op_rr(s, INDEX_op_ext8s_i32, rd, rs); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_debug_assert(TCG_TARGET_HAS_ext8s_i64); - tcg_out_op_rr(s, INDEX_op_ext8s_i64, rd, rs); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_sextract(s, type, rd, rs, 0, 8); } static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) { - if (TCG_TARGET_REG_BITS == 64) { - tcg_debug_assert(TCG_TARGET_HAS_ext8u_i64); - tcg_out_op_rr(s, INDEX_op_ext8u_i64, rd, rs); - } else { - tcg_debug_assert(TCG_TARGET_HAS_ext8u_i32); - tcg_out_op_rr(s, INDEX_op_ext8u_i32, rd, rs); - } + tcg_out_extract(s, TCG_TYPE_REG, rd, rs, 0, 8); } static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) { - switch (type) { - case TCG_TYPE_I32: - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i32); - tcg_out_op_rr(s, INDEX_op_ext16s_i32, rd, rs); - break; -#if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_debug_assert(TCG_TARGET_HAS_ext16s_i64); - tcg_out_op_rr(s, INDEX_op_ext16s_i64, rd, rs); - break; -#endif - default: - g_assert_not_reached(); - } + tcg_out_sextract(s, type, rd, rs, 0, 16); } static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs) { - if (TCG_TARGET_REG_BITS == 64) { - tcg_debug_assert(TCG_TARGET_HAS_ext16u_i64); - tcg_out_op_rr(s, INDEX_op_ext16u_i64, rd, rs); - } else { - tcg_debug_assert(TCG_TARGET_HAS_ext16u_i32); - tcg_out_op_rr(s, INDEX_op_ext16u_i32, rd, rs); - } + tcg_out_extract(s, TCG_TYPE_REG, rd, rs, 0, 16); } static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_debug_assert(TCG_TARGET_HAS_ext32s_i64); - tcg_out_op_rr(s, INDEX_op_ext32s_i64, rd, rs); + tcg_out_sextract(s, TCG_TYPE_I64, rd, rs, 0, 32); } static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs) { tcg_debug_assert(TCG_TARGET_REG_BITS == 64); - tcg_debug_assert(TCG_TARGET_HAS_ext32u_i64); - tcg_out_op_rr(s, INDEX_op_ext32u_i64, rd, rs); + tcg_out_extract(s, TCG_TYPE_I64, rd, rs, 0, 32); } static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) @@ -656,18 +466,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, tcg_out32(s, insn); } -#if TCG_TARGET_REG_BITS == 64 -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i64): \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) \ - case glue(glue(INDEX_op_, x), _i64): -#else -# define CASE_32_64(x) \ - case glue(glue(INDEX_op_, x), _i32): -# define CASE_64(x) -#endif - static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) { tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); @@ -680,197 +478,772 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_op_r(s, INDEX_op_goto_ptr, a0); +} + void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { /* Always indirect, nothing to do */ } -static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type, - const TCGArg args[TCG_MAX_OP_ARGS], - const int const_args[TCG_MAX_OP_ARGS]) +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - TCGOpcode exts; + tcg_out_op_rrr(s, INDEX_op_add, a0, a1, a2); +} - switch (opc) { - case INDEX_op_goto_ptr: - tcg_out_op_r(s, opc, args[0]); - break; +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_add, +}; - case INDEX_op_br: - tcg_out_op_l(s, opc, arg_label(args[0])); - break; +static TCGConstraintSetIndex cset_addsubcarry(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I2(r, r, r) : C_NotImplemented; +} - CASE_32_64(setcond) - tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]); - break; +static void tgen_addco(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addco, a0, a1, a2); +} - CASE_32_64(movcond) - case INDEX_op_setcond2_i32: - tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2], - args[3], args[4], args[5]); - break; +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addco, +}; - CASE_32_64(ld8u) - CASE_32_64(ld8s) - CASE_32_64(ld16u) - CASE_32_64(ld16s) - case INDEX_op_ld_i32: - CASE_64(ld32u) - CASE_64(ld32s) - CASE_64(ld) - CASE_32_64(st8) - CASE_32_64(st16) - case INDEX_op_st_i32: - CASE_64(st32) - CASE_64(st) - tcg_out_ldst(s, opc, args[0], args[1], args[2]); - break; +static void tgen_addci(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addci, a0, a1, a2); +} - CASE_32_64(add) - CASE_32_64(sub) - CASE_32_64(mul) - CASE_32_64(and) - CASE_32_64(or) - CASE_32_64(xor) - CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */ - CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */ - CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */ - CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */ - CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */ - CASE_32_64(shl) - CASE_32_64(shr) - CASE_32_64(sar) - CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */ - CASE_32_64(div) /* Optional (TCG_TARGET_HAS_div_*). */ - CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */ - CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */ - CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */ - CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */ - CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */ - tcg_out_op_rrr(s, opc, args[0], args[1], args[2]); - break; +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addci, +}; - CASE_32_64(deposit) - tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], args[3], args[4]); - break; +static void tgen_addcio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_addcio, a0, a1, a2); +} - CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */ - CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */ - tcg_out_op_rrbb(s, opc, args[0], args[1], args[2], args[3]); - break; +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_addcio, +}; - CASE_32_64(brcond) - tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32 - ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64), - TCG_REG_TMP, args[0], args[1], args[2]); - tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3])); - break; +static void tcg_out_set_carry(TCGContext *s) +{ + tcg_out_op_v(s, INDEX_op_tci_setcarry); +} - CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */ - CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */ - CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */ - case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */ - case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */ - tcg_out_op_rr(s, opc, args[0], args[1]); - break; +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_and, a0, a1, a2); +} - case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */ - exts = INDEX_op_ext16s_i32; - goto do_bswap; - case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */ - exts = INDEX_op_ext16s_i64; - goto do_bswap; - case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */ - exts = INDEX_op_ext32s_i64; - do_bswap: - /* The base tci bswaps zero-extend, and ignore high bits. */ - tcg_out_op_rr(s, opc, args[0], args[1]); - if (args[2] & TCG_BSWAP_OS) { - tcg_out_op_rr(s, exts, args[0], args[0]); - } - break; +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_and, +}; - CASE_32_64(add2) - CASE_32_64(sub2) - tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2], - args[3], args[4], args[5]); - break; +static void tgen_andc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_andc, a0, a1, a2); +} -#if TCG_TARGET_REG_BITS == 32 - case INDEX_op_brcond2_i32: - tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, - args[0], args[1], args[2], args[3], args[4]); - tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5])); - break; +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_andc, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_clz32 + : INDEX_op_clz); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_clz, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_ctz32 + : INDEX_op_ctz); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_ctz, +}; + +static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, + TCGReg a2, unsigned ofs, unsigned len) +{ + tcg_out_op_rrrbb(s, INDEX_op_deposit, a0, a1, a2, ofs, len); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_deposit, +}; + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divs32 + : INDEX_op_divs); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divu32 + : INDEX_op_divu); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_eqv(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_eqv, a0, a1, a2); +} + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_eqv, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_extract(s, TCG_TYPE_I64, a0, a1, 32, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; #endif - CASE_32_64(mulu2) - CASE_32_64(muls2) - tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]); - break; +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_mul, a0, a1, a2); +} - case INDEX_op_qemu_ld_i64: - case INDEX_op_qemu_st_i64: - if (TCG_TARGET_REG_BITS == 32) { - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[3]); - tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], TCG_REG_TMP); - break; - } - /* fall through */ - case INDEX_op_qemu_ld_i32: - case INDEX_op_qemu_st_i32: - if (TCG_TARGET_REG_BITS == 64 && s->addr_type == TCG_TYPE_I32) { - tcg_out_ext32u(s, TCG_REG_TMP, args[1]); - tcg_out_op_rrm(s, opc, args[0], TCG_REG_TMP, args[2]); - } else { - tcg_out_op_rrm(s, opc, args[0], args[1], args[2]); - } - break; +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; - case INDEX_op_mb: - tcg_out_op_v(s, opc); - break; +static TCGConstraintSetIndex cset_mul2(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O2_I2(r, r, r, r) : C_NotImplemented; +} - case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ - case INDEX_op_mov_i64: - case INDEX_op_call: /* Always emitted via tcg_out_call. */ - case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ - case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ - case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ - case INDEX_op_ext8s_i64: - case INDEX_op_ext8u_i32: - case INDEX_op_ext8u_i64: - case INDEX_op_ext16s_i32: - case INDEX_op_ext16s_i64: - case INDEX_op_ext16u_i32: - case INDEX_op_ext16u_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - case INDEX_op_extrl_i64_i32: - default: - g_assert_not_reached(); +static void tgen_muls2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_op_rrrr(s, INDEX_op_muls2, a0, a1, a2, a3); +} + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_muls2, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_mulu2(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3) +{ + tcg_out_op_rrrr(s, INDEX_op_mulu2, a0, a1, a2, a3); +} + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_mul2, + .out_rrrr = tgen_mulu2, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static void tgen_nand(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_nand, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nand, +}; + +static void tgen_nor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_nor, a0, a1, a2); +} + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_nor, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_or, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_or, +}; + +static void tgen_orc(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_orc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_orc, +}; + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rems32 + : INDEX_op_rems); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_remu32 + : INDEX_op_remu); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotl32 + : INDEX_op_rotl); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotl, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotr32 + : INDEX_op_rotr); + tcg_out_op_rrr(s, opc, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotr, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type < TCG_TYPE_REG) { + tcg_out_ext32s(s, TCG_REG_TMP, a1); + a1 = TCG_REG_TMP; } + tcg_out_op_rrr(s, INDEX_op_sar, a0, a1, a2); } -static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, - intptr_t offset) +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sar, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) { - switch (type) { - case TCG_TYPE_I32: - tcg_out_ldst(s, INDEX_op_st_i32, val, base, offset); - break; + tcg_out_op_rrr(s, INDEX_op_shl, a0, a1, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shl, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + if (type < TCG_TYPE_REG) { + tcg_out_ext32u(s, TCG_REG_TMP, a1); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, INDEX_op_shr, a0, a1, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shr, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_sub, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_subbo(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbo, a0, a1, a2); +} + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbo, +}; + +static void tgen_subbi(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbi, a0, a1, a2); +} + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbi, +}; + +static void tgen_subbio(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_subbio, a0, a1, a2); +} + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_addsubcarry, + .out_rrr = tgen_subbio, +}; + +static void tcg_out_set_borrow(TCGContext *s) +{ + tcg_out_op_v(s, INDEX_op_tci_setcarry); /* borrow == carry */ +} + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_xor, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_xor, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_ctpop, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + +static void tgen_bswap16(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_op_rr(s, INDEX_op_bswap16, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 16); + } +} + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap16, +}; + +static void tgen_bswap32(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, unsigned flags) +{ + tcg_out_op_rr(s, INDEX_op_bswap32, a0, a1); + if (flags & TCG_BSWAP_OS) { + tcg_out_sextract(s, TCG_TYPE_REG, a0, a0, 0, 32); + } +} + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap32, +}; + #if TCG_TARGET_REG_BITS == 64 - case TCG_TYPE_I64: - tcg_out_ldst(s, INDEX_op_st_i64, val, base, offset); - break; +static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_bswap64, a0, a1); +} + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_bswap64, +}; #endif - default: - g_assert_not_reached(); + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_neg, a0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_not, a0, a1); +} + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_not, +}; + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_setcond32 + : INDEX_op_setcond); + tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond(s, type, cond, dest, arg1, arg2); + tgen_neg(s, type, dest, dest); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_negsetcond, +}; + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg0, TCGReg arg1, TCGLabel *l) +{ + tgen_setcond(s, type, cond, TCG_REG_TMP, arg0, arg1); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, r), + .out_rr = tgen_brcond, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_movcond32 + : INDEX_op_movcond); + tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_movcond, +}; + +static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh, TCGLabel *l) +{ + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP, + al, ah, bl, bh, cond); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpBrcond2 outop_brcond2 = { + .base.static_constraint = C_O0_I4(r, r, r, r), + .out = tgen_brcond2, +}; + +static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, + TCGArg bl, bool const_bl, + TCGArg bh, bool const_bh) +{ + tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, ret, al, ah, bl, bh, cond); +} + +#if TCG_TARGET_REG_BITS != 32 +__attribute__((unused)) +#endif +static const TCGOutOpSetcond2 outop_setcond2 = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_setcond2, +}; + +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + tcg_out_op_v(s, INDEX_op_mb); +} + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_op_l(s, INDEX_op_br, l); +} + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8u, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8s, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16u, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16s, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +#if TCG_TARGET_REG_BITS == 64 +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32u, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32s, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; +#endif + +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st8, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st16, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_ld2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrrr(s, INDEX_op_qemu_ld2, datalo, datahi, addr, TCG_REG_TMP); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O2_I1(r, r, r), + .out = + TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_ld2, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +static void tgen_qemu_st2(TCGContext *s, TCGType type, TCGReg datalo, + TCGReg datahi, TCGReg addr, MemOpIdx oi) +{ + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, oi); + tcg_out_op_rrrr(s, INDEX_op_qemu_st2, datalo, datahi, addr, TCG_REG_TMP); +} + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = + TCG_TARGET_REG_BITS == 64 ? C_NotImplemented : C_O0_I3(r, r, r), + .out = + TCG_TARGET_REG_BITS == 64 ? NULL : tgen_qemu_st2, +}; + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, + intptr_t offset) +{ + TCGOpcode op = INDEX_op_st; + + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { + op = INDEX_op_st32; } + tcg_out_ldst(s, op, val, base, offset); } static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, |