diff options
-rw-r--r-- | target-arm/helper.c | 245 | ||||
-rw-r--r-- | target-arm/helpers.h | 46 | ||||
-rw-r--r-- | target-arm/op.c | 321 | ||||
-rw-r--r-- | target-arm/op_addsub.h | 67 | ||||
-rw-r--r-- | target-arm/op_helper.c | 67 | ||||
-rw-r--r-- | target-arm/translate.c | 240 | ||||
-rw-r--r-- | tcg/tcg-op.h | 19 |
7 files changed, 556 insertions, 449 deletions
diff --git a/target-arm/helper.c b/target-arm/helper.c index c61c610..ee6cd59 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -1922,3 +1922,248 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum, } #endif + +/* Note that signed overflow is undefined in C. The following routines are + careful to use unsigned types where modulo arithmetic is required. + Failure to do so _will_ break on newer gcc. */ + +/* Signed saturating arithmetic. */ + +/* Perform 16-bit signed satruating addition. */ +static inline uint16_t add16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a + b; + if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { + if (a & 0x8000) + res = 0x8000; + else + res = 0x7fff; + } + return res; +} + +/* Perform 8-bit signed satruating addition. */ +static inline uint8_t add8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a + b; + if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { + if (a & 0x80) + res = 0x80; + else + res = 0x7f; + } + return res; +} + +/* Perform 16-bit signed satruating subtraction. */ +static inline uint16_t sub16_sat(uint16_t a, uint16_t b) +{ + uint16_t res; + + res = a - b; + if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { + if (a & 0x8000) + res = 0x8000; + else + res = 0x7fff; + } + return res; +} + +/* Perform 8-bit signed satruating subtraction. */ +static inline uint8_t sub8_sat(uint8_t a, uint8_t b) +{ + uint8_t res; + + res = a - b; + if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { + if (a & 0x80) + res = 0x80; + else + res = 0x7f; + } + return res; +} + +#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); +#define PFX q + +#include "op_addsub.h" + +/* Unsigned saturating arithmetic. */ +static inline uint16_t add16_usat(uint16_t a, uint8_t b) +{ + uint16_t res; + res = a + b; + if (res < a) + res = 0xffff; + return res; +} + +static inline uint16_t sub16_usat(uint16_t a, uint8_t b) +{ + if (a < b) + return a - b; + else + return 0; +} + +static inline uint8_t add8_usat(uint8_t a, uint8_t b) +{ + uint8_t res; + res = a + b; + if (res < a) + res = 0xff; + return res; +} + +static inline uint8_t sub8_usat(uint8_t a, uint8_t b) +{ + if (a < b) + return a - b; + else + return 0; +} + +#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); +#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); +#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); +#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); +#define PFX uq + +#include "op_addsub.h" + +/* Signed modulo arithmetic. */ +#define SARITH16(a, b, n, op) do { \ + int32_t sum; \ + sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \ + RESULT(sum, n, 16); \ + if (sum >= 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define SARITH8(a, b, n, op) do { \ + int32_t sum; \ + sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \ + RESULT(sum, n, 8); \ + if (sum >= 0) \ + ge |= 1 << n; \ + } while(0) + + +#define ADD16(a, b, n) SARITH16(a, b, n, +) +#define SUB16(a, b, n) SARITH16(a, b, n, -) +#define ADD8(a, b, n) SARITH8(a, b, n, +) +#define SUB8(a, b, n) SARITH8(a, b, n, -) +#define PFX s +#define ARITH_GE + +#include "op_addsub.h" + +/* Unsigned modulo arithmetic. */ +#define ADD16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define ADD8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define SUB16(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ + RESULT(sum, n, 16); \ + if ((sum >> 16) == 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define SUB8(a, b, n) do { \ + uint32_t sum; \ + sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ + RESULT(sum, n, 8); \ + if ((sum >> 8) == 0) \ + ge |= 3 << (n * 2); \ + } while(0) + +#define PFX u +#define ARITH_GE + +#include "op_addsub.h" + +/* Halved signed arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) +#define PFX sh + +#include "op_addsub.h" + +/* Halved unsigned arithmetic. */ +#define ADD16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define SUB16(a, b, n) \ + RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) +#define ADD8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define SUB8(a, b, n) \ + RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) +#define PFX uh + +#include "op_addsub.h" + +static inline uint8_t do_usad(uint8_t a, uint8_t b) +{ + if (a > b) + return a - b; + else + return b - a; +} + +/* Unsigned sum of absolute byte differences. */ +uint32_t HELPER(usad8)(uint32_t a, uint32_t b) +{ + uint32_t sum; + sum = do_usad(a, b); + sum += do_usad(a >> 8, b >> 8); + sum += do_usad(a >> 16, b >>16); + sum += do_usad(a >> 24, b >> 24); + return sum; +} + +/* For ARMv6 SEL instruction. */ +uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) +{ + uint32_t mask; + + mask = 0; + if (flags & 1) + mask |= 0xff; + if (flags & 2) + mask |= 0xff00; + if (flags & 4) + mask |= 0xff0000; + if (flags & 8) + mask |= 0xff000000; + return (a & mask) | (b & ~mask); +} + diff --git a/target-arm/helpers.h b/target-arm/helpers.h index 9f60814..854b67c 100644 --- a/target-arm/helpers.h +++ b/target-arm/helpers.h @@ -1,4 +1,4 @@ -#define DEF_HELPER(name, ret, args) ret helper_##name args; +#define DEF_HELPER(name, ret, args) ret glue(helper_,name) args; #ifdef GEN_HELPER #define DEF_HELPER_1_1(name, ret, args) \ @@ -13,10 +13,18 @@ static inline void gen_helper_##name(TCGv ret, TCGv arg1, TCGv arg2) \ { \ tcg_gen_helper_1_2(helper_##name, ret, arg1, arg2); \ } +#define DEF_HELPER_1_3(name, ret, args) \ +DEF_HELPER(name, ret, args) \ +static inline void gen_helper_##name(TCGv ret, \ + TCGv arg1, TCGv arg2, TCGv arg3) \ +{ \ + tcg_gen_helper_1_3(helper_##name, ret, arg1, arg2, arg3); \ +} #else /* !GEN_HELPER */ #define DEF_HELPER_1_1 DEF_HELPER #define DEF_HELPER_1_2 DEF_HELPER -#define HELPER(x) helper_##x +#define DEF_HELPER_1_3 DEF_HELPER +#define HELPER(x) glue(helper_,x) #endif DEF_HELPER_1_1(clz, uint32_t, (uint32_t)) @@ -33,6 +41,40 @@ DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t)) DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t)) DEF_HELPER_1_1(rbit, uint32_t, (uint32_t)) +#define PAS_OP(pfx) \ + DEF_HELPER_1_3(pfx ## add8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ + DEF_HELPER_1_3(pfx ## sub8, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ + DEF_HELPER_1_3(pfx ## sub16, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ + DEF_HELPER_1_3(pfx ## add16, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ + DEF_HELPER_1_3(pfx ## addsubx, uint32_t, (uint32_t, uint32_t, uint32_t *)) \ + DEF_HELPER_1_3(pfx ## subaddx, uint32_t, (uint32_t, uint32_t, uint32_t *)) + +PAS_OP(s) +PAS_OP(u) +#undef PAS_OP + +#define PAS_OP(pfx) \ + DEF_HELPER_1_2(pfx ## add8, uint32_t, (uint32_t, uint32_t)) \ + DEF_HELPER_1_2(pfx ## sub8, uint32_t, (uint32_t, uint32_t)) \ + DEF_HELPER_1_2(pfx ## sub16, uint32_t, (uint32_t, uint32_t)) \ + DEF_HELPER_1_2(pfx ## add16, uint32_t, (uint32_t, uint32_t)) \ + DEF_HELPER_1_2(pfx ## addsubx, uint32_t, (uint32_t, uint32_t)) \ + DEF_HELPER_1_2(pfx ## subaddx, uint32_t, (uint32_t, uint32_t)) +PAS_OP(q) +PAS_OP(sh) +PAS_OP(uq) +PAS_OP(uh) +#undef PAS_OP + +DEF_HELPER_1_2(ssat, uint32_t, (uint32_t, uint32_t)) +DEF_HELPER_1_2(usat, uint32_t, (uint32_t, uint32_t)) +DEF_HELPER_1_2(ssat16, uint32_t, (uint32_t, uint32_t)) +DEF_HELPER_1_2(usat16, uint32_t, (uint32_t, uint32_t)) + +DEF_HELPER_1_2(usad8, uint32_t, (uint32_t, uint32_t)) + +DEF_HELPER_1_3(sel_flags, uint32_t, (uint32_t, uint32_t, uint32_t)) + #undef DEF_HELPER #undef DEF_HELPER_1_1 #undef DEF_HELPER_1_2 diff --git a/target-arm/op.c b/target-arm/op.c index e714d41..c3150ad 100644 --- a/target-arm/op.c +++ b/target-arm/op.c @@ -805,327 +805,6 @@ void OPPROTO op_movl_user_T0(void) FORCE_RET(); } -/* ARMv6 Media instructions. */ - -/* Note that signed overflow is undefined in C. The following routines are - careful to use unsigned types where modulo arithmetic is required. - Failure to do so _will_ break on newer gcc. */ - -/* Signed saturating arithmetic. */ - -/* Perform 16-bit signed satruating addition. */ -static inline uint16_t add16_sat(uint16_t a, uint16_t b) -{ - uint16_t res; - - res = a + b; - if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { - if (a & 0x8000) - res = 0x8000; - else - res = 0x7fff; - } - return res; -} - -/* Perform 8-bit signed satruating addition. */ -static inline uint8_t add8_sat(uint8_t a, uint8_t b) -{ - uint8_t res; - - res = a + b; - if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { - if (a & 0x80) - res = 0x80; - else - res = 0x7f; - } - return res; -} - -/* Perform 16-bit signed satruating subtraction. */ -static inline uint16_t sub16_sat(uint16_t a, uint16_t b) -{ - uint16_t res; - - res = a - b; - if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { - if (a & 0x8000) - res = 0x8000; - else - res = 0x7fff; - } - return res; -} - -/* Perform 8-bit signed satruating subtraction. */ -static inline uint8_t sub8_sat(uint8_t a, uint8_t b) -{ - uint8_t res; - - res = a - b; - if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { - if (a & 0x80) - res = 0x80; - else - res = 0x7f; - } - return res; -} - -#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16); -#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16); -#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8); -#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8); -#define PFX q - -#include "op_addsub.h" - -/* Unsigned saturating arithmetic. */ -static inline uint16_t add16_usat(uint16_t a, uint8_t b) -{ - uint16_t res; - res = a + b; - if (res < a) - res = 0xffff; - return res; -} - -static inline uint16_t sub16_usat(uint16_t a, uint8_t b) -{ - if (a < b) - return a - b; - else - return 0; -} - -static inline uint8_t add8_usat(uint8_t a, uint8_t b) -{ - uint8_t res; - res = a + b; - if (res < a) - res = 0xff; - return res; -} - -static inline uint8_t sub8_usat(uint8_t a, uint8_t b) -{ - if (a < b) - return a - b; - else - return 0; -} - -#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); -#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16); -#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8); -#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8); -#define PFX uq - -#include "op_addsub.h" - -/* Signed modulo arithmetic. */ -#define SARITH16(a, b, n, op) do { \ - int32_t sum; \ - sum = (int16_t)((uint16_t)(a) op (uint16_t)(b)); \ - RESULT(sum, n, 16); \ - if (sum >= 0) \ - ge |= 3 << (n * 2); \ - } while(0) - -#define SARITH8(a, b, n, op) do { \ - int32_t sum; \ - sum = (int8_t)((uint8_t)(a) op (uint8_t)(b)); \ - RESULT(sum, n, 8); \ - if (sum >= 0) \ - ge |= 1 << n; \ - } while(0) - - -#define ADD16(a, b, n) SARITH16(a, b, n, +) -#define SUB16(a, b, n) SARITH16(a, b, n, -) -#define ADD8(a, b, n) SARITH8(a, b, n, +) -#define SUB8(a, b, n) SARITH8(a, b, n, -) -#define PFX s -#define ARITH_GE - -#include "op_addsub.h" - -/* Unsigned modulo arithmetic. */ -#define ADD16(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \ - RESULT(sum, n, 16); \ - if ((sum >> 16) == 0) \ - ge |= 3 << (n * 2); \ - } while(0) - -#define ADD8(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \ - RESULT(sum, n, 8); \ - if ((sum >> 8) == 0) \ - ge |= 3 << (n * 2); \ - } while(0) - -#define SUB16(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \ - RESULT(sum, n, 16); \ - if ((sum >> 16) == 0) \ - ge |= 3 << (n * 2); \ - } while(0) - -#define SUB8(a, b, n) do { \ - uint32_t sum; \ - sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \ - RESULT(sum, n, 8); \ - if ((sum >> 8) == 0) \ - ge |= 3 << (n * 2); \ - } while(0) - -#define PFX u -#define ARITH_GE - -#include "op_addsub.h" - -/* Halved signed arithmetic. */ -#define ADD16(a, b, n) \ - RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16) -#define SUB16(a, b, n) \ - RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16) -#define ADD8(a, b, n) \ - RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8) -#define SUB8(a, b, n) \ - RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8) -#define PFX sh - -#include "op_addsub.h" - -/* Halved unsigned arithmetic. */ -#define ADD16(a, b, n) \ - RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16) -#define SUB16(a, b, n) \ - RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16) -#define ADD8(a, b, n) \ - RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8) -#define SUB8(a, b, n) \ - RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8) -#define PFX uh - -#include "op_addsub.h" - -void OPPROTO op_sel_T0_T1(void) -{ - uint32_t mask; - uint32_t flags; - - flags = env->GE; - mask = 0; - if (flags & 1) - mask |= 0xff; - if (flags & 2) - mask |= 0xff00; - if (flags & 4) - mask |= 0xff0000; - if (flags & 8) - mask |= 0xff000000; - T0 = (T0 & mask) | (T1 & ~mask); - FORCE_RET(); -} - -/* Signed saturation. */ -static inline uint32_t do_ssat(int32_t val, int shift) -{ - int32_t top; - uint32_t mask; - - shift = PARAM1; - top = val >> shift; - mask = (1u << shift) - 1; - if (top > 0) { - env->QF = 1; - return mask; - } else if (top < -1) { - env->QF = 1; - return ~mask; - } - return val; -} - -/* Unsigned saturation. */ -static inline uint32_t do_usat(int32_t val, int shift) -{ - uint32_t max; - - shift = PARAM1; - max = (1u << shift) - 1; - if (val < 0) { - env->QF = 1; - return 0; - } else if (val > max) { - env->QF = 1; - return max; - } - return val; -} - -/* Signed saturate. */ -void OPPROTO op_ssat_T1(void) -{ - T0 = do_ssat(T0, PARAM1); - FORCE_RET(); -} - -/* Dual halfword signed saturate. */ -void OPPROTO op_ssat16_T1(void) -{ - uint32_t res; - - res = (uint16_t)do_ssat((int16_t)T0, PARAM1); - res |= do_ssat(((int32_t)T0) >> 16, PARAM1) << 16; - T0 = res; - FORCE_RET(); -} - -/* Unsigned saturate. */ -void OPPROTO op_usat_T1(void) -{ - T0 = do_usat(T0, PARAM1); - FORCE_RET(); -} - -/* Dual halfword unsigned saturate. */ -void OPPROTO op_usat16_T1(void) -{ - uint32_t res; - - res = (uint16_t)do_usat((int16_t)T0, PARAM1); - res |= do_usat(((int32_t)T0) >> 16, PARAM1) << 16; - T0 = res; - FORCE_RET(); -} - -/* Dual 16-bit add. */ -static inline uint8_t do_usad(uint8_t a, uint8_t b) -{ - if (a > b) - return a - b; - else - return b - a; -} - -/* Unsigned sum of absolute byte differences. */ -void OPPROTO op_usad8_T0_T1(void) -{ - uint32_t sum; - sum = do_usad(T0, T1); - sum += do_usad(T0 >> 8, T1 >> 8); - sum += do_usad(T0 >> 16, T1 >>16); - sum += do_usad(T0 >> 24, T1 >> 24); - T0 = sum; -} - void OPPROTO op_movl_T1_r13_banked(void) { T1 = helper_get_r13_banked(env, PARAM1); diff --git a/target-arm/op_addsub.h b/target-arm/op_addsub.h index d15360d..376ee27 100644 --- a/target-arm/op_addsub.h +++ b/target-arm/op_addsub.h @@ -8,9 +8,11 @@ */ #ifdef ARITH_GE +#define GE_ARG , uint32_t *gep #define DECLARE_GE uint32_t ge = 0 -#define SET_GE env->GE = ge +#define SET_GE *gep = ge #else +#define GE_ARG #define DECLARE_GE do{}while(0) #define SET_GE do{}while(0) #endif @@ -18,82 +20,77 @@ #define RESULT(val, n, width) \ res |= ((uint32_t)(glue(glue(uint,width),_t))(val)) << (n * width) -void OPPROTO glue(glue(op_,PFX),add16_T0_T1)(void) +uint32_t HELPER(glue(PFX,add16))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - ADD16(T0, T1, 0); - ADD16(T0 >> 16, T1 >> 16, 1); + ADD16(a, b, 0); + ADD16(a >> 16, b >> 16, 1); SET_GE; - T0 = res; - FORCE_RET(); + return res; } -void OPPROTO glue(glue(op_,PFX),add8_T0_T1)(void) +uint32_t HELPER(glue(PFX,add8))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - ADD8(T0, T1, 0); - ADD8(T0 >> 8, T1 >> 8, 1); - ADD8(T0 >> 16, T1 >> 16, 2); - ADD8(T0 >> 24, T1 >> 24, 3); + ADD8(a, b, 0); + ADD8(a >> 8, b >> 8, 1); + ADD8(a >> 16, b >> 16, 2); + ADD8(a >> 24, b >> 24, 3); SET_GE; - T0 = res; - FORCE_RET(); + return res; } -void OPPROTO glue(glue(op_,PFX),sub16_T0_T1)(void) +uint32_t HELPER(glue(PFX,sub16))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - SUB16(T0, T1, 0); - SUB16(T0 >> 16, T1 >> 16, 1); + SUB16(a, b, 0); + SUB16(a >> 16, b >> 16, 1); SET_GE; - T0 = res; - FORCE_RET(); + return res; } -void OPPROTO glue(glue(op_,PFX),sub8_T0_T1)(void) +uint32_t HELPER(glue(PFX,sub8))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - SUB8(T0, T1, 0); - SUB8(T0 >> 8, T1 >> 8, 1); - SUB8(T0 >> 16, T1 >> 16, 2); - SUB8(T0 >> 24, T1 >> 24, 3); + SUB8(a, b, 0); + SUB8(a >> 8, b >> 8, 1); + SUB8(a >> 16, b >> 16, 2); + SUB8(a >> 24, b >> 24, 3); SET_GE; - T0 = res; - FORCE_RET(); + return res; } -void OPPROTO glue(glue(op_,PFX),subaddx_T0_T1)(void) +uint32_t HELPER(glue(PFX,subaddx))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - ADD16(T0, T1, 0); - SUB16(T0 >> 16, T1 >> 16, 1); + ADD16(a, b, 0); + SUB16(a >> 16, b >> 16, 1); SET_GE; - T0 = res; - FORCE_RET(); + return res; } -void OPPROTO glue(glue(op_,PFX),addsubx_T0_T1)(void) +uint32_t HELPER(glue(PFX,addsubx))(uint32_t a, uint32_t b GE_ARG) { uint32_t res = 0; DECLARE_GE; - SUB16(T0, T1, 0); - ADD16(T0 >> 16, T1 >> 16, 1); + SUB16(a, b, 0); + ADD16(a >> 16, b >> 16, 1); SET_GE; - T0 = res; - FORCE_RET(); + return res; } +#undef GE_ARG #undef DECLARE_GE #undef SET_GE #undef RESULT diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 1b90f58..2d3abfd 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -369,3 +369,70 @@ uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b) return res; } +/* Signed saturation. */ +static inline uint32_t do_ssat(int32_t val, int shift) +{ + int32_t top; + uint32_t mask; + + shift = PARAM1; + top = val >> shift; + mask = (1u << shift) - 1; + if (top > 0) { + env->QF = 1; + return mask; + } else if (top < -1) { + env->QF = 1; + return ~mask; + } + return val; +} + +/* Unsigned saturation. */ +static inline uint32_t do_usat(int32_t val, int shift) +{ + uint32_t max; + + shift = PARAM1; + max = (1u << shift) - 1; + if (val < 0) { + env->QF = 1; + return 0; + } else if (val > max) { + env->QF = 1; + return max; + } + return val; +} + +/* Signed saturate. */ +uint32_t HELPER(ssat)(uint32_t x, uint32_t shift) +{ + return do_ssat(x, shift); +} + +/* Dual halfword signed saturate. */ +uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift) +{ + uint32_t res; + + res = (uint16_t)do_ssat((int16_t)x, shift); + res |= do_ssat(((int32_t)x) >> 16, shift) << 16; + return res; +} + +/* Unsigned saturate. */ +uint32_t HELPER(usat)(uint32_t x, uint32_t shift) +{ + return do_usat(x, shift); +} + +/* Dual halfword unsigned saturate. */ +uint32_t HELPER(usat16)(uint32_t x, uint32_t shift) +{ + uint32_t res; + + res = (uint16_t)do_usat((int16_t)x, shift); + res |= do_usat(((int32_t)x) >> 16, shift) << 16; + return res; +} diff --git a/target-arm/translate.c b/target-arm/translate.c index e46cfb9..f732f91 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -226,7 +226,6 @@ static void gen_smul_dual(TCGv a, TCGv b) { TCGv tmp1 = new_tmp(); TCGv tmp2 = new_tmp(); - TCGv res; tcg_gen_ext8s_i32(tmp1, a); tcg_gen_ext8s_i32(tmp2, b); tcg_gen_mul_i32(tmp1, tmp1, tmp2); @@ -495,49 +494,93 @@ static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags) } }; -#define PAS_OP(pfx) { \ - gen_op_ ## pfx ## add16_T0_T1, \ - gen_op_ ## pfx ## addsubx_T0_T1, \ - gen_op_ ## pfx ## subaddx_T0_T1, \ - gen_op_ ## pfx ## sub16_T0_T1, \ - gen_op_ ## pfx ## add8_T0_T1, \ - NULL, \ - NULL, \ - gen_op_ ## pfx ## sub8_T0_T1 } - -static GenOpFunc *gen_arm_parallel_addsub[8][8] = { - {}, - PAS_OP(s), - PAS_OP(q), - PAS_OP(sh), - {}, - PAS_OP(u), - PAS_OP(uq), - PAS_OP(uh), -}; +#define PAS_OP(pfx) \ + switch (op2) { \ + case 0: gen_pas_helper(glue(pfx,add16)); break; \ + case 1: gen_pas_helper(glue(pfx,addsubx)); break; \ + case 2: gen_pas_helper(glue(pfx,subaddx)); break; \ + case 3: gen_pas_helper(glue(pfx,sub16)); break; \ + case 4: gen_pas_helper(glue(pfx,add8)); break; \ + case 7: gen_pas_helper(glue(pfx,sub8)); break; \ + } +void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b) +{ + TCGv tmp; + + switch (op1) { +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp) + case 1: + tmp = tcg_temp_new(TCG_TYPE_PTR); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); + PAS_OP(s) + break; + case 5: + tmp = tcg_temp_new(TCG_TYPE_PTR); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); + PAS_OP(u) + break; +#undef gen_pas_helper +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) + case 2: + PAS_OP(q); + break; + case 3: + PAS_OP(sh); + break; + case 6: + PAS_OP(uq); + break; + case 7: + PAS_OP(uh); + break; +#undef gen_pas_helper + } +} #undef PAS_OP -/* For unknown reasons Arm and Thumb-2 use arbitrarily diffenet encodings. */ -#define PAS_OP(pfx) { \ - gen_op_ ## pfx ## add8_T0_T1, \ - gen_op_ ## pfx ## add16_T0_T1, \ - gen_op_ ## pfx ## addsubx_T0_T1, \ - NULL, \ - gen_op_ ## pfx ## sub8_T0_T1, \ - gen_op_ ## pfx ## sub16_T0_T1, \ - gen_op_ ## pfx ## subaddx_T0_T1, \ - NULL } - -static GenOpFunc *gen_thumb2_parallel_addsub[8][8] = { - PAS_OP(s), - PAS_OP(q), - PAS_OP(sh), - {}, - PAS_OP(u), - PAS_OP(uq), - PAS_OP(uh), - {} -}; +/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */ +#define PAS_OP(pfx) \ + switch (op2) { \ + case 0: gen_pas_helper(glue(pfx,add8)); break; \ + case 1: gen_pas_helper(glue(pfx,add16)); break; \ + case 2: gen_pas_helper(glue(pfx,addsubx)); break; \ + case 4: gen_pas_helper(glue(pfx,sub8)); break; \ + case 5: gen_pas_helper(glue(pfx,sub16)); break; \ + case 6: gen_pas_helper(glue(pfx,subaddx)); break; \ + } +void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b) +{ + TCGv tmp; + + switch (op1) { +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp) + case 0: + tmp = tcg_temp_new(TCG_TYPE_PTR); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); + PAS_OP(s) + break; + case 4: + tmp = tcg_temp_new(TCG_TYPE_PTR); + tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE)); + PAS_OP(u) + break; +#undef gen_pas_helper +#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b) + case 1: + PAS_OP(q); + break; + case 2: + PAS_OP(sh); + break; + case 5: + PAS_OP(uq); + break; + case 6: + PAS_OP(uh); + break; +#undef gen_pas_helper + } +} #undef PAS_OP static GenOpFunc1 *gen_test_cc[14] = { @@ -4906,6 +4949,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh; TCGv tmp; TCGv tmp2; + TCGv tmp3; insn = ldl_code(s->pc); s->pc += 4; @@ -5591,13 +5635,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) switch ((insn >> 23) & 3) { case 0: /* Parallel add/subtract. */ op1 = (insn >> 20) & 7; - gen_movl_T0_reg(s, rn); - gen_movl_T1_reg(s, rm); + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); sh = (insn >> 5) & 7; if ((op1 & 3) == 0 || sh == 5 || sh == 6) goto illegal_op; - gen_arm_parallel_addsub[op1][sh](); - gen_movl_reg_T0(s, rd); + gen_arm_parallel_addsub(op1, sh, tmp, tmp2); + dead_tmp(tmp2); + store_reg(s, rd, tmp); break; case 1: if ((insn & 0x00700020) == 0) { @@ -5620,40 +5665,44 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) store_reg(s, rd, tmp); } else if ((insn & 0x00200020) == 0x00200000) { /* [us]sat */ - gen_movl_T1_reg(s, rm); + tmp = load_reg(s, rm); shift = (insn >> 7) & 0x1f; if (insn & (1 << 6)) { if (shift == 0) shift = 31; - gen_op_sarl_T1_im(shift); + tcg_gen_sari_i32(tmp, tmp, shift); } else { - gen_op_shll_T1_im(shift); + tcg_gen_shli_i32(tmp, tmp, shift); } sh = (insn >> 16) & 0x1f; if (sh != 0) { if (insn & (1 << 22)) - gen_op_usat_T1(sh); + gen_helper_usat(tmp, tmp, tcg_const_i32(sh)); else - gen_op_ssat_T1(sh); + gen_helper_ssat(tmp, tmp, tcg_const_i32(sh)); } - gen_movl_T1_reg(s, rd); + store_reg(s, rd, tmp); } else if ((insn & 0x00300fe0) == 0x00200f20) { /* [us]sat16 */ - gen_movl_T1_reg(s, rm); + tmp = load_reg(s, rm); sh = (insn >> 16) & 0x1f; if (sh != 0) { if (insn & (1 << 22)) - gen_op_usat16_T1(sh); + gen_helper_usat16(tmp, tmp, tcg_const_i32(sh)); else - gen_op_ssat16_T1(sh); + gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh)); } - gen_movl_T1_reg(s, rd); + store_reg(s, rd, tmp); } else if ((insn & 0x00700fe0) == 0x00000fa0) { /* Select bytes. */ - gen_movl_T0_reg(s, rn); - gen_movl_T1_reg(s, rm); - gen_op_sel_T0_T1(); - gen_movl_reg_T0(s, rd); + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + tmp3 = new_tmp(); + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); + gen_helper_sel_flags(tmp, tmp3, tmp, tmp2); + dead_tmp(tmp3); + dead_tmp(tmp2); + store_reg(s, rd, tmp); } else if ((insn & 0x000003e0) == 0x00000060) { gen_movl_T1_reg(s, rm); shift = (insn >> 10) & 3; @@ -5755,15 +5804,17 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7); switch (op1) { case 0: /* Unsigned sum of absolute differences. */ - goto illegal_op; - gen_movl_T0_reg(s, rm); - gen_movl_T1_reg(s, rs); - gen_op_usad8_T0_T1(); + ARCH(6); + tmp = load_reg(s, rm); + tmp2 = load_reg(s, rs); + gen_helper_usad8(tmp, tmp, tmp2); + dead_tmp(tmp2); if (rn != 15) { - gen_movl_T1_reg(s, rn); - gen_op_addl_T0_T1(); + tmp2 = load_reg(s, rn); + tcg_gen_add_i32(tmp, tmp, tmp2); + dead_tmp(tmp2); } - gen_movl_reg_T0(s, rd); + store_reg(s, rd, tmp); break; case 0x20: case 0x24: case 0x28: case 0x2c: /* Bitfield insert/clear. */ @@ -6120,6 +6171,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) uint32_t insn, imm, shift, offset, addr; uint32_t rd, rn, rm, rs; TCGv tmp; + TCGv tmp2; + TCGv tmp3; int op; int shiftop; int conds; @@ -6464,10 +6517,11 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) shift = (insn >> 4) & 7; if ((op & 3) == 3 || (shift & 3) == 3) goto illegal_op; - gen_movl_T0_reg(s, rn); - gen_movl_T1_reg(s, rm); - gen_thumb2_parallel_addsub[op][shift](); - gen_movl_reg_T0(s, rd); + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); + gen_thumb2_parallel_addsub(op, shift, tmp, tmp2); + dead_tmp(tmp2); + store_reg(s, rd, tmp); break; case 3: /* Other data processing. */ op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7); @@ -6498,7 +6552,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) break; case 0x10: /* sel */ gen_movl_T1_reg(s, rm); - gen_op_sel_T0_T1(); + tmp3 = new_tmp(); + tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE)); + gen_helper_sel_flags(cpu_T[0], tmp3, cpu_T[0], cpu_T[1]); + dead_tmp(tmp3); break; case 0x18: /* clz */ gen_helper_clz(cpu_T[0], cpu_T[0]); @@ -6581,7 +6638,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_movl_reg_T0(s, rd); break; case 7: /* Unsigned sum of absolute differences. */ - gen_op_usad8_T0_T1(); + gen_helper_usad8(cpu_T[0], cpu_T[0], cpu_T[1]); if (rs != 15) { gen_movl_T1_reg(s, rs); gen_op_addl_T0_T1(); @@ -6821,63 +6878,64 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) op = (insn >> 21) & 7; imm = insn & 0x1f; shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c); - if (rn == 15) - gen_op_movl_T1_im(0); - else - gen_movl_T1_reg(s, rn); + if (rn == 15) { + tmp = new_tmp(); + tcg_gen_movi_i32(tmp, 0); + } else { + tmp = load_reg(s, rn); + } switch (op) { case 2: /* Signed bitfield extract. */ imm++; if (shift + imm > 32) goto illegal_op; if (imm < 32) - gen_sbfx(cpu_T[1], shift, imm); + gen_sbfx(tmp, shift, imm); break; case 6: /* Unsigned bitfield extract. */ imm++; if (shift + imm > 32) goto illegal_op; if (imm < 32) - gen_ubfx(cpu_T[1], shift, (1u << imm) - 1); + gen_ubfx(tmp, shift, (1u << imm) - 1); break; case 3: /* Bitfield insert/clear. */ if (imm < shift) goto illegal_op; imm = imm + 1 - shift; if (imm != 32) { - gen_movl_T0_reg(s, rd); - gen_bfi(cpu_T[1], cpu_T[0], cpu_T[1], + tmp2 = load_reg(s, rd); + gen_bfi(tmp, tmp2, tmp, shift, ((1u << imm) - 1) << shift); + dead_tmp(tmp2); } break; case 7: goto illegal_op; default: /* Saturate. */ - gen_movl_T1_reg(s, rn); if (shift) { if (op & 1) - gen_op_sarl_T1_im(shift); + tcg_gen_sari_i32(tmp, tmp, shift); else - gen_op_shll_T1_im(shift); + tcg_gen_shli_i32(tmp, tmp, shift); } + tmp2 = tcg_const_i32(imm); if (op & 4) { /* Unsigned. */ - gen_op_ssat_T1(imm); if ((op & 1) && shift == 0) - gen_op_usat16_T1(imm); + gen_helper_usat16(tmp, tmp, tmp2); else - gen_op_usat_T1(imm); + gen_helper_usat(tmp, tmp, tmp2); } else { /* Signed. */ - gen_op_ssat_T1(imm); if ((op & 1) && shift == 0) - gen_op_ssat16_T1(imm); + gen_helper_ssat16(tmp, tmp, tmp2); else - gen_op_ssat_T1(imm); + gen_helper_ssat(tmp, tmp, tmp2); } break; } - gen_movl_reg_T1(s, rd); + store_reg(s, rd, tmp); } else { imm = ((insn & 0x04000000) >> 15) | ((insn & 0x7000) >> 4) | (insn & 0xff); diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index f05c135..27f83b5 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -237,6 +237,18 @@ static inline void tcg_gen_helper_1_2(void *func, TCGv ret, 1, &ret, 2, args); } +static inline void tcg_gen_helper_1_3(void *func, TCGv ret, + TCGv arg1, TCGv arg2, TCGv arg3) +{ + TCGv args[3]; + args[0] = arg1; + args[1] = arg2; + args[2] = arg3; + tcg_gen_call(&tcg_ctx, + tcg_const_ptr((tcg_target_long)func), TCG_HELPER_CALL_FLAGS, + 1, &ret, 3, args); +} + static inline void tcg_gen_helper_1_4(void *func, TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, TCGv arg4) @@ -1416,3 +1428,10 @@ static inline void tcg_gen_qemu_st64(TCGv arg, TCGv addr, int mem_index) #define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 #define tcg_const_tl tcg_const_i32 #endif + +#if TCG_TARGET_REG_BITS == 32 +#define tcg_gen_addi_ptr tcg_gen_addi_i32 +#else /* TCG_TARGET_REG_BITS == 32 */ +#define tcg_gen_addi_ptr tcg_gen_addi_i64 +#endif /* TCG_TARGET_REG_BITS != 32 */ + |