diff options
Diffstat (limited to 'docs/devel/tcg-ops.rst')
-rw-r--r-- | docs/devel/tcg-ops.rst | 228 |
1 files changed, 141 insertions, 87 deletions
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 688984f..f26b837 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -239,7 +239,7 @@ Jumps/Labels - | Jump to label. - * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* + * - brcond *t0*, *t1*, *cond*, *label* - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | @@ -261,98 +261,117 @@ Arithmetic .. list-table:: - * - add_i32/i64 *t0*, *t1*, *t2* + * - add *t0*, *t1*, *t2* - | *t0* = *t1* + *t2* - * - sub_i32/i64 *t0*, *t1*, *t2* + * - sub *t0*, *t1*, *t2* - | *t0* = *t1* - *t2* - * - neg_i32/i64 *t0*, *t1* + * - neg *t0*, *t1* - | *t0* = -*t1* (two's complement) - * - mul_i32/i64 *t0*, *t1*, *t2* + * - mul *t0*, *t1*, *t2* - | *t0* = *t1* * *t2* - * - div_i32/i64 *t0*, *t1*, *t2* + * - divs *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (signed) | Undefined behavior if division by zero or overflow. - * - divu_i32/i64 *t0*, *t1*, *t2* + * - divu *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (unsigned) | Undefined behavior if division by zero. - * - rem_i32/i64 *t0*, *t1*, *t2* + * - rems *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (signed) | Undefined behavior if division by zero or overflow. - * - remu_i32/i64 *t0*, *t1*, *t2* + * - remu *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (unsigned) | Undefined behavior if division by zero. + * - divs2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (signed) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass *nh* as a simple sign-extension of *nl*, so the only + overflow should be *INT_MIN* / -1. + + * - divu2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (unsigned) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass 0 to *nh* to make a simple zero-extension of *nl*, + so overflow should never occur. Logical ------- .. list-table:: - * - and_i32/i64 *t0*, *t1*, *t2* + * - and *t0*, *t1*, *t2* - | *t0* = *t1* & *t2* - * - or_i32/i64 *t0*, *t1*, *t2* + * - or *t0*, *t1*, *t2* - | *t0* = *t1* | *t2* - * - xor_i32/i64 *t0*, *t1*, *t2* + * - xor *t0*, *t1*, *t2* - | *t0* = *t1* ^ *t2* - * - not_i32/i64 *t0*, *t1* + * - not *t0*, *t1* - | *t0* = ~\ *t1* - * - andc_i32/i64 *t0*, *t1*, *t2* + * - andc *t0*, *t1*, *t2* - | *t0* = *t1* & ~\ *t2* - * - eqv_i32/i64 *t0*, *t1*, *t2* + * - eqv *t0*, *t1*, *t2* - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* - * - nand_i32/i64 *t0*, *t1*, *t2* + * - nand *t0*, *t1*, *t2* - | *t0* = ~(*t1* & *t2*) - * - nor_i32/i64 *t0*, *t1*, *t2* + * - nor *t0*, *t1*, *t2* - | *t0* = ~(*t1* | *t2*) - * - orc_i32/i64 *t0*, *t1*, *t2* + * - orc *t0*, *t1*, *t2* - | *t0* = *t1* | ~\ *t2* - * - clz_i32/i64 *t0*, *t1*, *t2* + * - clz *t0*, *t1*, *t2* - | *t0* = *t1* ? clz(*t1*) : *t2* - * - ctz_i32/i64 *t0*, *t1*, *t2* + * - ctz *t0*, *t1*, *t2* - | *t0* = *t1* ? ctz(*t1*) : *t2* - * - ctpop_i32/i64 *t0*, *t1* + * - ctpop *t0*, *t1* - | *t0* = number of bits set in *t1* | - | With *ctpop* short for "count population", matching - | the function name used in ``include/qemu/host-utils.h``. + | The name *ctpop* is short for "count population", and matches + the function name used in ``include/qemu/host-utils.h``. Shifts/Rotates @@ -360,30 +379,30 @@ Shifts/Rotates .. list-table:: - * - shl_i32/i64 *t0*, *t1*, *t2* + * - shl *t0*, *t1*, *t2* - | *t0* = *t1* << *t2* - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - shr_i32/i64 *t0*, *t1*, *t2* + * - shr *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (unsigned) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - sar_i32/i64 *t0*, *t1*, *t2* + * - sar *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (signed) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotl_i32/i64 *t0*, *t1*, *t2* + * - rotl *t0*, *t1*, *t2* - | Rotation of *t2* bits to the left - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotr_i32/i64 *t0*, *t1*, *t2* + * - rotr *t0*, *t1*, *t2* - | Rotation of *t2* bits to the right. - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. Misc @@ -391,26 +410,12 @@ Misc .. list-table:: - * - mov_i32/i64 *t0*, *t1* + * - mov *t0*, *t1* - | *t0* = *t1* - | Move *t1* to *t0* (both operands must have the same type). - - * - ext8s_i32/i64 *t0*, *t1* - - ext8u_i32/i64 *t0*, *t1* - - ext16s_i32/i64 *t0*, *t1* - - ext16u_i32/i64 *t0*, *t1* + | Move *t1* to *t0*. - ext32s_i64 *t0*, *t1* - - ext32u_i64 *t0*, *t1* - - - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) - - * - bswap16_i32/i64 *t0*, *t1*, *flags* + * - bswap16 *t0*, *t1*, *flags* - | 16 bit byte swap on the low bits of a 32/64 bit input. | @@ -420,24 +425,24 @@ Misc | | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. - * - bswap32_i64 *t0*, *t1*, *flags* - - - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, - except they apply from bit 31 instead of bit 15. + * - bswap32 *t0*, *t1*, *flags* - * - bswap32_i32 *t0*, *t1*, *flags* + - | 32 bit byte swap. The flags are the same as for bswap16, except + they apply from bit 31 instead of bit 15. On TCG_TYPE_I32, the + flags should be zero. - bswap64_i64 *t0*, *t1*, *flags* + * - bswap64 *t0*, *t1*, *flags* - - | 32/64 bit byte swap. The flags are ignored, but still present - for consistency with the other bswap opcodes. + - | 64 bit byte swap. The flags are ignored, but still present + for consistency with the other bswap opcodes. For future + compatibility, the flags should be zero. * - discard_i32/i64 *t0* - | Indicate that the value of *t0* won't be used later. It is useful to force dead code elimination. - * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* + * - deposit *dest*, *t1*, *t2*, *pos*, *len* - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | @@ -446,14 +451,16 @@ Misc | *len* - the length of the bitfield | *pos* - the position of the first bit, counting from the LSB | - | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field + | For example, "deposit dest, t1, t2, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) + | + | on TCG_TYPE_I32. - * - extract_i32/i64 *dest*, *t1*, *pos*, *len* + * - extract *dest*, *t1*, *pos*, *len* - sextract_i32/i64 *dest*, *t1*, *pos*, *len* + sextract *dest*, *t1*, *pos*, *len* - | Extract a bitfield from *t1*, placing the result in *dest*. | @@ -462,16 +469,16 @@ Misc to the left with zeros; for sextract_*, the result will be extended to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | - | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field + | For example, "sextract dest, t1, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* << 20) >> 28 | - | (using an arithmetic right shift). + | (using an arithmetic right shift) on TCG_TYPE_I32. - * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* + * - extract2 *dest*, *t1*, *t2*, *pos* - - | For N = {32,64}, extract an N-bit quantity from the concatenation + - | For TCG_TYPE_I{N}, extract an N-bit quantity from the concatenation of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander accepts 0 <= *pos* <= N as inputs. The backend code generator will not see either 0 or N as inputs for these opcodes. @@ -494,19 +501,19 @@ Conditional moves .. list-table:: - * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - setcond *dest*, *t1*, *t2*, *cond* - | *dest* = (*t1* *cond* *t2*) | | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - negsetcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - negsetcond *dest*, *t1*, *t2*, *cond* - | *dest* = -(*t1* *cond* *t2*) | | Set *dest* to -1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* + * - movcond *dest*, *c1*, *c2*, *v1*, *v2*, *cond* - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | @@ -586,26 +593,79 @@ Multiword arithmetic support .. list-table:: - * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + * - addco *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* and in addition output to the + carry bit provided by the host architecture. + + * - addci *t0, *t1*, *t2* - sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture. + The output carry bit need not be computed. - - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are - formed from two single-word arguments, and the double-word output *t0* - is returned in two single-word outputs. + * - addcio *t0, *t1*, *t2* - * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture, + and also compute the output carry bit. + + * - addc1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + 1, and in addition output to the + carry bit provided by the host architecture. This is akin to + *addcio* with a fixed carry-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the addition chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_carry`` and then + the output routine for *addcio*. + + * - subbo *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* and in addition output to the + borrow bit provided by the host architecture. + | Depending on the host architecture, the carry bit may or may not be + identical to the borrow bit. Thus the addc\* and subb\* + opcodes must not be mixed. + + * - subbi *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture. + The output borrow bit need not be computed. + + * - subbio *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture, + and also compute the output borrow bit. + + * - subb1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - 1, and in addition output to the + borrow bit provided by the host architecture. This is akin to + *subbio* with a fixed borrow-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the subtraction chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_borrow`` and then + the output routine for *subbio*. + + * - mulu2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full double-word product *t0*. The latter is returned in two single-word outputs. - * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + * - muls2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. - * - mulsh_i32/i64 *t0*, *t1*, *t2* + * - mulsh *t0*, *t1*, *t2* - muluh_i32/i64 *t0*, *t1*, *t2* + muluh *t0*, *t1*, *t2* - | Provide the high part of a signed or unsigned multiply, respectively. | @@ -684,8 +744,6 @@ QEMU specific operations qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx* - qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* - - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output register *t0* only. The address *t1* is always sized according to the guest, @@ -703,10 +761,6 @@ QEMU specific operations 64-bit memory access specified in *flags*. | | For qemu_ld/st_i128, these are only supported for a 64-bit host. - | - | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of - the memory operation is known to be 8-bit. This allows the backend to - provide a different set of register constraints. Host vector operations @@ -884,9 +938,9 @@ Assumptions The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or 64 bit. It is expected that the pointer has the same size as the word. -On a 32 bit target, all 64 bit operations are converted to 32 bits. A -few specific operations must be implemented to allow it (see add2_i32, -sub2_i32, brcond2_i32). +On a 32 bit target, all 64 bit operations are converted to 32 bits. +A few specific operations must be implemented to allow it +(see brcond2_i32, setcond2_i32). On a 64 bit target, the values are transferred between 32 and 64-bit registers using the following ops: |