diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-05-02 16:23:05 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-05-02 16:23:05 +0100 |
commit | 15106f7dc3290ff3254611f265849a314a93eb0e (patch) | |
tree | 85d40a24a95c1b70bb20f277b77896971ae47254 | |
parent | 53c5433e84e8935abed8e91d4a2eb813168a0ecf (diff) | |
parent | e628c0156be74dd14a261bbd18674bacd1afcc7d (diff) | |
download | qemu-15106f7dc3290ff3254611f265849a314a93eb0e.zip qemu-15106f7dc3290ff3254611f265849a314a93eb0e.tar.gz qemu-15106f7dc3290ff3254611f265849a314a93eb0e.tar.bz2 |
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-hex-20210502' into staging
Minor cleanups.
Finish the rest of the hexagon integer instructions.
# gpg: Signature made Sun 02 May 2021 15:38:17 BST
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F
* remotes/rth-gitlab/tags/pull-hex-20210502: (31 commits)
Hexagon (target/hexagon) CABAC decode bin
Hexagon (target/hexagon) load into shifted register instructions
Hexagon (target/hexagon) load and unpack bytes instructions
Hexagon (target/hexagon) bit reverse (brev) addressing
Hexagon (target/hexagon) circular addressing
Hexagon (target/hexagon) add A4_addp_c/A4_subp_c
Hexagon (target/hexagon) add A6_vminub_RdP
Hexagon (target/hexagon) add A5_ACS (vacsh)
Hexagon (target/hexagon) add F2_sfinvsqrta
Hexagon (target/hexagon) add F2_sfrecipa instruction
Hexagon (target/hexagon) compile all debug code
Hexagon (target/hexagon) move QEMU_GENERATE to only be on during macros.h
Hexagon (target/hexagon) cleanup reg_field_info definition
Hexagon (target/hexagon) cleanup ternary operators in semantics
Hexagon (target/hexagon) use softfloat for float-to-int conversions
Hexagon (target/hexagon) replace float32_mul_pow2 with float32_scalbn
Hexagon (target/hexagon) use softfloat default NaN and tininess
Hexagon (target/hexagon) change type of softfloat_roundingmodes
Hexagon (target/hexagon) remove unused carry_from_add64 function
Hexagon (target/hexagon) change variables from int to bool when appropriate
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
40 files changed, 3754 insertions, 636 deletions
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index c2f87ad..9ea318f 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -145,6 +145,9 @@ static FloatParts parts_default_nan(float_status *status) #elif defined(TARGET_HPPA) /* snan_bit_is_one, set msb-1. */ frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2); +#elif defined(TARGET_HEXAGON) + sign = 1; + frac = ~0ULL; #else /* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V, * S390, SH4, TriCore, and Xtensa. I cannot find documentation diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c index 9a68ca0..bc34f5d 100644 --- a/linux-user/hexagon/cpu_loop.c +++ b/linux-user/hexagon/cpu_loop.c @@ -25,7 +25,7 @@ void cpu_loop(CPUHexagonState *env) { - CPUState *cs = CPU(hexagon_env_get_cpu(env)); + CPUState *cs = env_cpu(env); int trapnr, signum, sigcode; target_ulong sigaddr; target_ulong syscallnum; diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 09de124..68a55b3 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -27,6 +27,97 @@ #define SF_MANTBITS 23 #define float32_nan make_float32(0xffffffff) +/* + * These three tables are used by the cabacdecbin instruction + */ +const uint8_t rLPS_table_64x4[64][4] = { + {128, 176, 208, 240}, + {128, 167, 197, 227}, + {128, 158, 187, 216}, + {123, 150, 178, 205}, + {116, 142, 169, 195}, + {111, 135, 160, 185}, + {105, 128, 152, 175}, + {100, 122, 144, 166}, + {95, 116, 137, 158}, + {90, 110, 130, 150}, + {85, 104, 123, 142}, + {81, 99, 117, 135}, + {77, 94, 111, 128}, + {73, 89, 105, 122}, + {69, 85, 100, 116}, + {66, 80, 95, 110}, + {62, 76, 90, 104}, + {59, 72, 86, 99}, + {56, 69, 81, 94}, + {53, 65, 77, 89}, + {51, 62, 73, 85}, + {48, 59, 69, 80}, + {46, 56, 66, 76}, + {43, 53, 63, 72}, + {41, 50, 59, 69}, + {39, 48, 56, 65}, + {37, 45, 54, 62}, + {35, 43, 51, 59}, + {33, 41, 48, 56}, + {32, 39, 46, 53}, + {30, 37, 43, 50}, + {29, 35, 41, 48}, + {27, 33, 39, 45}, + {26, 31, 37, 43}, + {24, 30, 35, 41}, + {23, 28, 33, 39}, + {22, 27, 32, 37}, + {21, 26, 30, 35}, + {20, 24, 29, 33}, + {19, 23, 27, 31}, + {18, 22, 26, 30}, + {17, 21, 25, 28}, + {16, 20, 23, 27}, + {15, 19, 22, 25}, + {14, 18, 21, 24}, + {14, 17, 20, 23}, + {13, 16, 19, 22}, + {12, 15, 18, 21}, + {12, 14, 17, 20}, + {11, 14, 16, 19}, + {11, 13, 15, 18}, + {10, 12, 15, 17}, + {10, 12, 14, 16}, + {9, 11, 13, 15}, + {9, 11, 12, 14}, + {8, 10, 12, 14}, + {8, 9, 11, 13}, + {7, 9, 11, 12}, + {7, 9, 10, 12}, + {7, 8, 10, 11}, + {6, 8, 9, 11}, + {6, 7, 9, 10}, + {6, 7, 8, 9}, + {2, 2, 2, 2} +}; + +const uint8_t AC_next_state_MPS_64[64] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 62, 63 +}; + + +const uint8_t AC_next_state_LPS_64[64] = { + 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, + 8, 9, 9, 11, 11, 12, 13, 13, 15, 15, + 16, 16, 18, 18, 19, 19, 21, 21, 22, 22, + 23, 24, 24, 25, 26, 26, 27, 27, 28, 29, + 29, 30, 30, 30, 31, 32, 32, 33, 33, 33, + 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, + 37, 38, 38, 63 +}; + #define BITS_MASK_8 0x5555555555555555ULL #define PAIR_MASK_8 0x3333333333333333ULL #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL @@ -76,19 +167,6 @@ uint64_t deinterleave(uint64_t src) return myeven | (myodd << 32); } -uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c) -{ - uint64_t tmpa, tmpb, tmpc; - tmpa = fGETUWORD(0, a); - tmpb = fGETUWORD(0, b); - tmpc = tmpa + tmpb + c; - tmpa = fGETUWORD(1, a); - tmpb = fGETUWORD(1, b); - tmpc = tmpa + tmpb + fGETUWORD(1, tmpc); - tmpc = fGETUWORD(1, tmpc); - return tmpc; -} - int32_t conv_round(int32_t a, int n) { int64_t val; @@ -108,7 +186,7 @@ int32_t conv_round(int32_t a, int n) /* Floating Point Stuff */ -static const int softfloat_roundingmodes[] = { +static const FloatRoundMode softfloat_roundingmodes[] = { float_round_nearest_even, float_round_to_zero, float_round_down, @@ -156,12 +234,6 @@ void arch_fpop_end(CPUHexagonState *env) } } -static float32 float32_mul_pow2(float32 a, uint32_t p, float_status *fp_status) -{ - float32 b = make_float32((SF_BIAS + p) << SF_MANTBITS); - return float32_mul(a, b, fp_status); -} - int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, float_status *fp_status) { @@ -200,12 +272,13 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, /* or put Inf in num fixup? */ uint8_t RsV_sign = float32_is_neg(RsV); uint8_t RtV_sign = float32_is_neg(RtV); + /* Check that RsV is NOT infinite before we overwrite it */ + if (!float32_is_infinity(RsV)) { + float_raise(float_flag_divbyzero, fp_status); + } RsV = infinite_float32(RsV_sign ^ RtV_sign); RtV = float32_one; RdV = float32_one; - if (float32_is_infinity(RsV)) { - float_raise(float_flag_divbyzero, fp_status); - } } else if (float32_is_infinity(RtV)) { RsV = make_float32(0x80000000 & (RsV ^ RtV)); RtV = float32_one; @@ -230,22 +303,22 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { /* Near quotient underflow / inexact Q */ PeV = 0x80; - RtV = float32_mul_pow2(RtV, -64, fp_status); - RsV = float32_mul_pow2(RsV, 64, fp_status); + RtV = float32_scalbn(RtV, -64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) { /* Near quotient overflow */ PeV = 0x40; - RtV = float32_mul_pow2(RtV, 32, fp_status); - RsV = float32_mul_pow2(RsV, -32, fp_status); + RtV = float32_scalbn(RtV, 32, fp_status); + RsV = float32_scalbn(RsV, -32, fp_status); } else if (n_exp <= SF_MANTBITS + 2) { - RtV = float32_mul_pow2(RtV, 64, fp_status); - RsV = float32_mul_pow2(RsV, 64, fp_status); + RtV = float32_scalbn(RtV, 64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); } else if (d_exp <= 1) { - RtV = float32_mul_pow2(RtV, 32, fp_status); - RsV = float32_mul_pow2(RsV, 32, fp_status); + RtV = float32_scalbn(RtV, 32, fp_status); + RsV = float32_scalbn(RsV, 32, fp_status); } else if (d_exp > 252) { - RtV = float32_mul_pow2(RtV, -32, fp_status); - RsV = float32_mul_pow2(RsV, -32, fp_status); + RtV = float32_scalbn(RtV, -32, fp_status); + RsV = float32_scalbn(RsV, -32, fp_status); } RdV = 0; ret = 1; @@ -265,7 +338,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, int r_exp; int ret = 0; RsV = *Rs; - if (float32_is_infinity(RsV)) { + if (float32_is_any_nan(RsV)) { if (extract32(RsV, 22, 1) == 0) { float_raise(float_flag_invalid, fp_status); } @@ -287,7 +360,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, /* Basic checks passed */ r_exp = float32_getexp(RsV); if (r_exp <= 24) { - RsV = float32_mul_pow2(RsV, 64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); PeV = 0xe0; } RdV = 0; @@ -298,3 +371,41 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, *adjust = PeV; return ret; } + +const uint8_t recip_lookup_table[128] = { + 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4, + 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9, + 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1, + 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b, + 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087, + 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075, + 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065, + 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056, + 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049, + 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c, + 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030, + 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025, + 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b, + 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012, + 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, + 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000, +}; + +const uint8_t invsqrt_lookup_table[128] = { + 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057, + 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045, + 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036, + 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028, + 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d, + 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012, + 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, + 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001, + 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4, + 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb, + 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6, + 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3, + 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093, + 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084, + 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077, + 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b, +}; diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h index 1f7f036..7091806 100644 --- a/target/hexagon/arch.h +++ b/target/hexagon/arch.h @@ -20,9 +20,12 @@ #include "qemu/int128.h" +extern const uint8_t rLPS_table_64x4[64][4]; +extern const uint8_t AC_next_state_MPS_64[64]; +extern const uint8_t AC_next_state_LPS_64[64]; + uint64_t interleave(uint32_t odd, uint32_t even); uint64_t deinterleave(uint64_t src); -uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c); int32_t conv_round(int32_t a, int n); void arch_fpop_start(CPUHexagonState *env); void arch_fpop_end(CPUHexagonState *env); @@ -31,4 +34,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, float_status *fp_status); +extern const uint8_t recip_lookup_table[128]; + +extern const uint8_t invsqrt_lookup_table[128]; + #endif diff --git a/target/hexagon/conv_emu.c b/target/hexagon/conv_emu.c deleted file mode 100644 index 3985b10..0000000 --- a/target/hexagon/conv_emu.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "qemu/host-utils.h" -#include "fpu/softfloat.h" -#include "macros.h" -#include "conv_emu.h" - -#define LL_MAX_POS 0x7fffffffffffffffULL -#define MAX_POS 0x7fffffffU - -static uint64_t conv_f64_to_8u_n(float64 in, int will_negate, - float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - if (float64_is_infinity(in)) { - float_raise(float_flag_invalid, fp_status); - if (float64_is_neg(in)) { - return 0ULL; - } else { - return ~0ULL; - } - } - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return ~0ULL; - } - if (float64_is_zero(in)) { - return 0; - } - if (sign) { - float_raise(float_flag_invalid, fp_status); - return 0; - } - if (float64_lt(in, float64_half, fp_status)) { - /* Near zero, captures large fracshifts, denorms, etc */ - float_raise(float_flag_inexact, fp_status); - switch (get_float_rounding_mode(fp_status)) { - case float_round_down: - if (will_negate) { - return 1; - } else { - return 0; - } - case float_round_up: - if (!will_negate) { - return 1; - } else { - return 0; - } - default: - return 0; /* nearest or towards zero */ - } - } - return float64_to_uint64(in, fp_status); -} - -static void clr_float_exception_flags(uint8_t flag, float_status *fp_status) -{ - uint8_t flags = fp_status->float_exception_flags; - flags &= ~flag; - set_float_exception_flags(flags, fp_status); -} - -static uint32_t conv_df_to_4u_n(float64 fp64, int will_negate, - float_status *fp_status) -{ - uint64_t tmp; - tmp = conv_f64_to_8u_n(fp64, will_negate, fp_status); - if (tmp > 0x00000000ffffffffULL) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - return ~0U; - } - return (uint32_t)tmp; -} - -uint64_t conv_df_to_8u(float64 in, float_status *fp_status) -{ - return conv_f64_to_8u_n(in, 0, fp_status); -} - -uint32_t conv_df_to_4u(float64 in, float_status *fp_status) -{ - return conv_df_to_4u_n(in, 0, fp_status); -} - -int64_t conv_df_to_8s(float64 in, float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - uint64_t tmp; - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return -1; - } - if (sign) { - float64 minus_fp64 = float64_abs(in); - tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status); - } else { - tmp = conv_f64_to_8u_n(in, 0, fp_status); - } - if (tmp > (LL_MAX_POS + sign)) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - tmp = (LL_MAX_POS + sign); - } - if (sign) { - return -tmp; - } else { - return tmp; - } -} - -int32_t conv_df_to_4s(float64 in, float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - uint64_t tmp; - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return -1; - } - if (sign) { - float64 minus_fp64 = float64_abs(in); - tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status); - } else { - tmp = conv_f64_to_8u_n(in, 0, fp_status); - } - if (tmp > (MAX_POS + sign)) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - tmp = (MAX_POS + sign); - } - if (sign) { - return -tmp; - } else { - return tmp; - } -} - -uint64_t conv_sf_to_8u(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_8u(fp64, fp_status); -} - -uint32_t conv_sf_to_4u(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_4u(fp64, fp_status); -} - -int64_t conv_sf_to_8s(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_8s(fp64, fp_status); -} - -int32_t conv_sf_to_4s(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_4s(fp64, fp_status); -} diff --git a/target/hexagon/conv_emu.h b/target/hexagon/conv_emu.h deleted file mode 100644 index cade9de..0000000 --- a/target/hexagon/conv_emu.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef HEXAGON_CONV_EMU_H -#define HEXAGON_CONV_EMU_H - -uint64_t conv_sf_to_8u(float32 in, float_status *fp_status); -uint32_t conv_sf_to_4u(float32 in, float_status *fp_status); -int64_t conv_sf_to_8s(float32 in, float_status *fp_status); -int32_t conv_sf_to_4s(float32 in, float_status *fp_status); - -uint64_t conv_df_to_8u(float64 in, float_status *fp_status); -uint32_t conv_df_to_4u(float64 in, float_status *fp_status); -int64_t conv_df_to_8s(float64 in, float_status *fp_status); -int32_t conv_df_to_4s(float64 in, float_status *fp_status); - -#endif diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index b0b3040..ff44fd6 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "qapi/error.h" #include "hw/qdev-properties.h" +#include "fpu/softfloat-helpers.h" static void hexagon_v67_cpu_init(Object *obj) { @@ -69,10 +70,9 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { * stacks at different locations. This is used to compensate so the diff is * cleaner. */ -static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env, - target_ulong addr) +static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr) { - HexagonCPU *cpu = container_of(env, HexagonCPU, env); + HexagonCPU *cpu = env_archcpu(env); target_ulong stack_adjust = cpu->lldb_stack_adjust; target_ulong stack_start = env->stack_start; target_ulong stack_size = 0x10000; @@ -88,7 +88,7 @@ static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env, } /* HEX_REG_P3_0 (aka C4) is an alias for the predicate registers */ -static inline target_ulong read_p3_0(CPUHexagonState *env) +static target_ulong read_p3_0(CPUHexagonState *env) { int32_t control_reg = 0; int i; @@ -116,7 +116,7 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum) static void hexagon_dump(CPUHexagonState *env, FILE *f) { - HexagonCPU *cpu = container_of(env, HexagonCPU, env); + HexagonCPU *cpu = env_archcpu(env); if (cpu->lldb_compat) { /* @@ -206,8 +206,12 @@ static void hexagon_cpu_reset(DeviceState *dev) CPUState *cs = CPU(dev); HexagonCPU *cpu = HEXAGON_CPU(cs); HexagonCPUClass *mcc = HEXAGON_CPU_GET_CLASS(cpu); + CPUHexagonState *env = &cpu->env; mcc->parent_reset(dev); + + set_default_nan_mode(1, &env->fp_status); + set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); } static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index e04eac5..2855dd3 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -127,11 +127,6 @@ typedef struct HexagonCPU { target_ulong lldb_stack_adjust; } HexagonCPU; -static inline HexagonCPU *hexagon_env_get_cpu(CPUHexagonState *env) -{ - return container_of(env, HexagonCPU, env); -} - #include "cpu_bits.h" #define cpu_signal_handler cpu_hexagon_signal_handler diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index 96af834..96fef71 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -47,7 +47,7 @@ static inline uint32_t iclass_bits(uint32_t encoding) return iclass; } -static inline int is_packet_end(uint32_t endocing) +static inline bool is_packet_end(uint32_t endocing) { uint32_t bits = parse_bits(endocing); return ((bits == 0x3) || (bits == 0x0)); diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index c9bacaa..dffe1d1 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -48,8 +48,8 @@ enum { DEF_REGMAP(R_16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23) DEF_REGMAP(R__8, 8, 0, 2, 4, 6, 16, 18, 20, 22) -#define DECODE_MAPPED_REG(REGNO, NAME) \ - insn->regno[REGNO] = DECODE_REGISTER_##NAME[insn->regno[REGNO]]; +#define DECODE_MAPPED_REG(OPNUM, NAME) \ + insn->regno[OPNUM] = DECODE_REGISTER_##NAME[insn->regno[OPNUM]]; typedef struct { const struct DectreeTable *table_link; @@ -340,8 +340,8 @@ static void decode_split_cmpjump(Packet *pkt) if (GET_ATTRIB(pkt->insn[i].opcode, A_NEWCMPJUMP)) { last = pkt->num_insns; pkt->insn[last] = pkt->insn[i]; /* copy the instruction */ - pkt->insn[last].part1 = 1; /* last instruction does the CMP */ - pkt->insn[i].part1 = 0; /* existing instruction does the JUMP */ + pkt->insn[last].part1 = true; /* last insn does the CMP */ + pkt->insn[i].part1 = false; /* existing insn does the JUMP */ pkt->num_insns++; } } @@ -354,7 +354,7 @@ static void decode_split_cmpjump(Packet *pkt) } } -static inline int decode_opcode_can_jump(int opcode) +static bool decode_opcode_can_jump(int opcode) { if ((GET_ATTRIB(opcode, A_JUMP)) || (GET_ATTRIB(opcode, A_CALL)) || @@ -362,15 +362,15 @@ static inline int decode_opcode_can_jump(int opcode) (opcode == J2_pause)) { /* Exception to A_JUMP attribute */ if (opcode == J4_hintjumpr) { - return 0; + return false; } - return 1; + return true; } - return 0; + return false; } -static inline int decode_opcode_ends_loop(int opcode) +static bool decode_opcode_ends_loop(int opcode) { return GET_ATTRIB(opcode, A_HWLOOP0_END) || GET_ATTRIB(opcode, A_HWLOOP1_END); @@ -383,9 +383,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) int numinsns = pkt->num_insns; uint16_t opcode; - pkt->pkt_has_cof = 0; - pkt->pkt_has_endloop = 0; - pkt->pkt_has_dczeroa = 0; + pkt->pkt_has_cof = false; + pkt->pkt_has_endloop = false; + pkt->pkt_has_dczeroa = false; for (i = 0; i < numinsns; i++) { opcode = pkt->insn[i].opcode; @@ -394,14 +394,14 @@ static void decode_set_insn_attr_fields(Packet *pkt) } if (GET_ATTRIB(opcode, A_DCZEROA)) { - pkt->pkt_has_dczeroa = 1; + pkt->pkt_has_dczeroa = true; } if (GET_ATTRIB(opcode, A_STORE)) { if (pkt->insn[i].slot == 0) { - pkt->pkt_has_store_s0 = 1; + pkt->pkt_has_store_s0 = true; } else { - pkt->pkt_has_store_s1 = 1; + pkt->pkt_has_store_s1 = true; } } @@ -422,9 +422,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) */ static void decode_shuffle_for_execution(Packet *packet) { - int changed = 0; + bool changed = false; int i; - int flag; /* flag means we've seen a non-memory instruction */ + bool flag; /* flag means we've seen a non-memory instruction */ int n_mems; int last_insn = packet->num_insns - 1; @@ -437,7 +437,7 @@ static void decode_shuffle_for_execution(Packet *packet) } do { - changed = 0; + changed = false; /* * Stores go last, must not reorder. * Cannot shuffle stores past loads, either. @@ -445,13 +445,13 @@ static void decode_shuffle_for_execution(Packet *packet) * then a store, shuffle the store to the front. Don't shuffle * stores wrt each other or a load. */ - for (flag = n_mems = 0, i = last_insn; i >= 0; i--) { + for (flag = false, n_mems = 0, i = last_insn; i >= 0; i--) { int opcode = packet->insn[i].opcode; if (flag && GET_ATTRIB(opcode, A_STORE)) { decode_send_insn_to(packet, i, last_insn - n_mems); n_mems++; - changed = 1; + changed = true; } else if (GET_ATTRIB(opcode, A_STORE)) { n_mems++; } else if (GET_ATTRIB(opcode, A_LOAD)) { @@ -466,7 +466,7 @@ static void decode_shuffle_for_execution(Packet *packet) * a .new value */ } else { - flag = 1; + flag = true; } } @@ -474,7 +474,7 @@ static void decode_shuffle_for_execution(Packet *packet) continue; } /* Compares go first, may be reordered wrt each other */ - for (flag = 0, i = 0; i < last_insn + 1; i++) { + for (flag = false, i = 0; i < last_insn + 1; i++) { int opcode = packet->insn[i].opcode; if ((strstr(opcode_wregs[opcode], "Pd4") || @@ -483,7 +483,7 @@ static void decode_shuffle_for_execution(Packet *packet) /* This should be a compare (not a store conditional) */ if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P3) && @@ -495,18 +495,18 @@ static void decode_shuffle_for_execution(Packet *packet) */ if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P0) && !GET_ATTRIB(opcode, A_NEWCMPJUMP)) { if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else { - flag = 1; + flag = true; } } if (changed) { @@ -543,7 +543,7 @@ static void decode_apply_extenders(Packet *packet) int i; for (i = 0; i < packet->num_insns; i++) { if (GET_ATTRIB(packet->insn[i].opcode, A_IT_EXTENDER)) { - packet->insn[i + 1].extension_valid = 1; + packet->insn[i + 1].extension_valid = true; apply_extender(packet, i + 1, packet->insn[i].immed[0]); } } @@ -764,7 +764,7 @@ static void decode_add_endloop_insn(Insn *insn, int loopnum) } } -static inline int decode_parsebits_is_loopend(uint32_t encoding32) +static bool decode_parsebits_is_loopend(uint32_t encoding32) { uint32_t bits = parse_bits(encoding32); return bits == 0x2; @@ -775,8 +775,11 @@ decode_set_slot_number(Packet *pkt) { int slot; int i; - int hit_mem_insn = 0; - int hit_duplex = 0; + bool hit_mem_insn = false; + bool hit_duplex = false; + bool slot0_found = false; + bool slot1_found = false; + int slot1_iidx = 0; /* * The slots are encoded in reverse order @@ -801,7 +804,7 @@ decode_set_slot_number(Packet *pkt) if ((GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE) || GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE_PACKET_RULES)) && !hit_mem_insn) { - hit_mem_insn = 1; + hit_mem_insn = true; pkt->insn[i].slot = 0; continue; } @@ -818,7 +821,7 @@ decode_set_slot_number(Packet *pkt) for (i = pkt->num_insns - 1; i >= 0; i--) { /* First subinsn always goes to slot 0 */ if (GET_ATTRIB(pkt->insn[i].opcode, A_SUBINSN) && !hit_duplex) { - hit_duplex = 1; + hit_duplex = true; pkt->insn[i].slot = 0; continue; } @@ -830,13 +833,10 @@ decode_set_slot_number(Packet *pkt) } /* Fix the exceptions - slot 1 is never empty, always aligns to slot 0 */ - int slot0_found = 0; - int slot1_found = 0; - int slot1_iidx = 0; for (i = pkt->num_insns - 1; i >= 0; i--) { /* Is slot0 used? */ if (pkt->insn[i].slot == 0) { - int is_endloop = (pkt->insn[i].opcode == J2_endloop01); + bool is_endloop = (pkt->insn[i].opcode == J2_endloop01); is_endloop |= (pkt->insn[i].opcode == J2_endloop0); is_endloop |= (pkt->insn[i].opcode == J2_endloop1); @@ -845,17 +845,17 @@ decode_set_slot_number(Packet *pkt) * slot0 for endloop */ if (!is_endloop) { - slot0_found = 1; + slot0_found = true; } } /* Is slot1 used? */ if (pkt->insn[i].slot == 1) { - slot1_found = 1; + slot1_found = true; slot1_iidx = i; } } /* Is slot0 empty and slot1 used? */ - if ((slot0_found == 0) && (slot1_found == 1)) { + if ((!slot0_found) && slot1_found) { /* Then push it to slot0 */ pkt->insn[slot1_iidx].slot = 0; } @@ -873,7 +873,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, { int num_insns = 0; int words_read = 0; - int end_of_packet = 0; + bool end_of_packet = false; int new_insns = 0; uint32_t encoding32; @@ -890,7 +890,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, * decode works */ if (pkt->insn[num_insns].opcode == A4_ext) { - pkt->insn[num_insns + 1].extension_valid = 1; + pkt->insn[num_insns + 1].extension_valid = true; } num_insns += new_insns; words_read++; @@ -913,7 +913,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, decode_add_endloop_insn(&pkt->insn[pkt->num_insns++], 0); } if (words_read >= 3) { - uint32_t has_loop0, has_loop1; + bool has_loop0, has_loop1; has_loop0 = decode_parsebits_is_loopend(words[0]); has_loop1 = decode_parsebits_is_loopend(words[1]); if (has_loop0 && has_loop1) { diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c index 842d903..d3b45d4 100644 --- a/target/hexagon/fma_emu.c +++ b/target/hexagon/fma_emu.c @@ -19,7 +19,6 @@ #include "qemu/int128.h" #include "fpu/softfloat.h" #include "macros.h" -#include "conv_emu.h" #include "fma_emu.h" #define DF_INF_EXP 0x7ff @@ -64,7 +63,7 @@ typedef union { }; } Float; -static inline uint64_t float64_getmant(float64 f64) +static uint64_t float64_getmant(float64 f64) { Double a = { .i = f64 }; if (float64_is_normal(f64)) { @@ -91,7 +90,7 @@ int32_t float64_getexp(float64 f64) return -1; } -static inline uint64_t float32_getmant(float32 f32) +static uint64_t float32_getmant(float32 f32) { Float a = { .i = f32 }; if (float32_is_normal(f32)) { @@ -118,17 +117,17 @@ int32_t float32_getexp(float32 f32) return -1; } -static inline uint32_t int128_getw0(Int128 x) +static uint32_t int128_getw0(Int128 x) { return int128_getlo(x); } -static inline uint32_t int128_getw1(Int128 x) +static uint32_t int128_getw1(Int128 x) { return int128_getlo(x) >> 32; } -static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi) +static Int128 int128_mul_6464(uint64_t ai, uint64_t bi) { Int128 a, b; uint64_t pp0, pp1a, pp1b, pp1s, pp2; @@ -152,7 +151,7 @@ static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi) return int128_make128(ret_low, pp2 + (pp1s >> 32)); } -static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow) +static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow) { Int128 ret = int128_sub(a, b); if (borrow != 0) { @@ -170,7 +169,7 @@ typedef struct { uint8_t sticky; } Accum; -static inline void accum_init(Accum *p) +static void accum_init(Accum *p) { p->mant = int128_zero(); p->exp = 0; @@ -180,7 +179,7 @@ static inline void accum_init(Accum *p) p->sticky = 0; } -static inline Accum accum_norm_left(Accum a) +static Accum accum_norm_left(Accum a) { a.exp--; a.mant = int128_lshift(a.mant, 1); @@ -190,6 +189,7 @@ static inline Accum accum_norm_left(Accum a) return a; } +/* This function is marked inline for performance reasons */ static inline Accum accum_norm_right(Accum a, int amt) { if (amt > 130) { @@ -226,7 +226,7 @@ static inline Accum accum_norm_right(Accum a, int amt) */ static Accum accum_add(Accum a, Accum b); -static inline Accum accum_sub(Accum a, Accum b, int negate) +static Accum accum_sub(Accum a, Accum b, int negate) { Accum ret; accum_init(&ret); @@ -329,7 +329,7 @@ static Accum accum_add(Accum a, Accum b) } /* Return an infinity with requested sign */ -static inline float64 infinite_float64(uint8_t sign) +static float64 infinite_float64(uint8_t sign) { if (sign) { return make_float64(DF_MINUS_INF); @@ -339,7 +339,7 @@ static inline float64 infinite_float64(uint8_t sign) } /* Return a maximum finite value with requested sign */ -static inline float64 maxfinite_float64(uint8_t sign) +static float64 maxfinite_float64(uint8_t sign) { if (sign) { return make_float64(DF_MINUS_MAXF); @@ -349,7 +349,7 @@ static inline float64 maxfinite_float64(uint8_t sign) } /* Return a zero value with requested sign */ -static inline float64 zero_float64(uint8_t sign) +static float64 zero_float64(uint8_t sign) { if (sign) { return make_float64(0x8000000000000000); @@ -369,7 +369,7 @@ float32 infinite_float32(uint8_t sign) } /* Return a maximum finite value with the requested sign */ -static inline float32 maxfinite_float32(uint8_t sign) +static float32 maxfinite_float32(uint8_t sign) { if (sign) { return make_float32(SF_MINUS_MAXF); @@ -379,7 +379,7 @@ static inline float32 maxfinite_float32(uint8_t sign) } /* Return a zero value with requested sign */ -static inline float32 zero_float32(uint8_t sign) +static float32 zero_float32(uint8_t sign) { if (sign) { return make_float32(0x80000000); @@ -389,7 +389,7 @@ static inline float32 zero_float32(uint8_t sign) } #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \ -static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \ +static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \ { \ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \ && ((a.guard | a.round | a.sticky) == 0)) { \ @@ -526,8 +526,8 @@ static bool is_inf_prod(float64 a, float64 b) (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a)))); } -static inline float64 special_fma(float64 a, float64 b, float64 c, - float_status *fp_status) +static float64 special_fma(float64 a, float64 b, float64 c, + float_status *fp_status) { float64 ret = make_float64(0); @@ -586,8 +586,8 @@ static inline float64 special_fma(float64 a, float64 b, float64 c, g_assert_not_reached(); } -static inline float32 special_fmaf(float32 a, float32 b, float32 c, - float_status *fp_status) +static float32 special_fmaf(float32 a, float32 b, float32 c, + float_status *fp_status) { float64 aa, bb, cc; aa = float32_to_float64(a, fp_status); diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index e044dea..18fcdbc 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -37,7 +37,10 @@ * _sp stack pointer relative r0 = memw(r29+#12) * _ap absolute set r0 = memw(r1=##variable) * _pr post increment register r0 = memw(r1++m1) + * _pbr post increment bit reverse r0 = memw(r1++m1:brev) * _pi post increment immediate r0 = memb(r1++#1) + * _pci post increment circular immediate r0 = memw(r1++#4:circ(m0)) + * _pcr post increment circular register r0 = memw(r1++I:circ(m0)) */ /* Macros for complex addressing modes */ @@ -51,12 +54,32 @@ fEA_REG(RxV); \ fPM_M(RxV, MuV); \ } while (0) +#define GET_EA_pbr \ + do { \ + gen_helper_fbrev(EA, RxV); \ + tcg_gen_add_tl(RxV, RxV, MuV); \ + } while (0) #define GET_EA_pi \ do { \ fEA_REG(RxV); \ fPM_I(RxV, siV); \ } while (0) - +#define GET_EA_pci \ + do { \ + TCGv tcgv_siV = tcg_const_tl(siV); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \ + hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(tcgv_siV); \ + } while (0) +#define GET_EA_pcr(SHIFT) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, (SHIFT)); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(ireg); \ + } while (0) /* Instructions with multiple definitions */ #define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \ @@ -80,20 +103,230 @@ #define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \ fGEN_TCG_LOAD_AP(RddV, 8, u) +#define fGEN_TCG_L2_loadrub_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrb_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadruh_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrh_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadri_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrd_pci(SHORTCODE) SHORTCODE + +#define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, SHIFT); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + LOAD; \ + tcg_temp_free(ireg); \ + } while (0) + +#define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV)) +#define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV)) +#define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV)) +#define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV)) +#define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV)) +#define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV)) + #define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrub_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE -#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE; +#define fGEN_TCG_L2_loadrb_pbr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadruh_pr(SHORTCODE) SHORTCODE -#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE; +#define fGEN_TCG_L2_loadruh_pbr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrh_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadri_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadri_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadri_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrd_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrd_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrd_pi(SHORTCODE) SHORTCODE /* + * These instructions load 2 bytes and places them in + * two halves of the destination register. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 2, u, EA, tmp); \ + tcg_gen_movi_tl(RdV, 0); \ + for (int i = 0; i < 2; i++) { \ + gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, true) +#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), false) +#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), true) + +/* + * These instructions load 4 bytes and places them in + * four halves of the destination register pair. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 4, u, EA, tmp); \ + tcg_gen_movi_i64(RddV, 0); \ + for (int i = 0; i < 4; i++) { \ + gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), false) +#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), true) +#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, true) + +/* + * These instructions load a half word, shift the destination right by 16 bits + * and place the loaded value in the high half word of the destination pair. + * The GET_EA macro determines the addressing mode. + */ +#define fGEN_TCG_loadalignh(GET_EA) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \ + GET_EA; \ + fLOAD(1, 2, u, EA, tmp); \ + tcg_gen_extu_i32_i64(tmp_i64, tmp); \ + tcg_gen_shri_i64(RyyV, RyyV, 16); \ + tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 48, 16); \ + tcg_temp_free(tmp); \ + tcg_temp_free_i64(tmp_i64); \ + } while (0) + +#define fGEN_TCG_L4_loadalignh_ur(SHORTCODE) \ + fGEN_TCG_loadalignh(fEA_IRs(UiV, RtV, uiV)) +#define fGEN_TCG_L2_loadalignh_io(SHORTCODE) \ + fGEN_TCG_loadalignh(fEA_RI(RsV, siV)) +#define fGEN_TCG_L2_loadalignh_pci(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pci) +#define fGEN_TCG_L2_loadalignh_pcr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pcr(1)) +#define fGEN_TCG_L4_loadalignh_ap(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_ap) +#define fGEN_TCG_L2_loadalignh_pr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pr) +#define fGEN_TCG_L2_loadalignh_pbr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pbr) +#define fGEN_TCG_L2_loadalignh_pi(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pi) + +/* Same as above, but loads a byte instead of half word */ +#define fGEN_TCG_loadalignb(GET_EA) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \ + GET_EA; \ + fLOAD(1, 1, u, EA, tmp); \ + tcg_gen_extu_i32_i64(tmp_i64, tmp); \ + tcg_gen_shri_i64(RyyV, RyyV, 8); \ + tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 56, 8); \ + tcg_temp_free(tmp); \ + tcg_temp_free_i64(tmp_i64); \ + } while (0) + +#define fGEN_TCG_L2_loadalignb_io(SHORTCODE) \ + fGEN_TCG_loadalignb(fEA_RI(RsV, siV)) +#define fGEN_TCG_L4_loadalignb_ur(SHORTCODE) \ + fGEN_TCG_loadalignb(fEA_IRs(UiV, RtV, uiV)) +#define fGEN_TCG_L2_loadalignb_pci(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pci) +#define fGEN_TCG_L2_loadalignb_pcr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pcr(0)) +#define fGEN_TCG_L4_loadalignb_ap(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_ap) +#define fGEN_TCG_L2_loadalignb_pr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pr) +#define fGEN_TCG_L2_loadalignb_pbr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pbr) +#define fGEN_TCG_L2_loadalignb_pi(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pi) + +/* * Predicated loads * Here is a primer to understand the tag names * @@ -195,6 +428,191 @@ #define fGEN_TCG_S4_stored_locked(SHORTCODE) \ do { SHORTCODE; READ_PREG(PdV, PdN); } while (0) +#define fGEN_TCG_STORE(SHORTCODE) \ + do { \ + TCGv HALF = tcg_temp_new(); \ + TCGv BYTE = tcg_temp_new(); \ + SHORTCODE; \ + tcg_temp_free(HALF); \ + tcg_temp_free(BYTE); \ + } while (0) + +#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + TCGv HALF = tcg_temp_new(); \ + TCGv BYTE = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, SHIFT); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + STORE; \ + tcg_temp_free(ireg); \ + tcg_temp_free(HALF); \ + tcg_temp_free(BYTE); \ + } while (0) + +#define fGEN_TCG_S2_storerb_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerb_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV))) + +#define fGEN_TCG_S2_storerh_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerh_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV))) + +#define fGEN_TCG_S2_storerf_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerf_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV))) + +#define fGEN_TCG_S2_storeri_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storeri_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV)) + +#define fGEN_TCG_S2_storerd_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerd_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV)) + +#define fGEN_TCG_S2_storerbnew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN))) + +#define fGEN_TCG_S2_storerhnew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN))) + +#define fGEN_TCG_S2_storerinew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN)) + +/* + * Mathematical operations with more than one definition require + * special handling + */ +#define fGEN_TCG_A5_ACS(SHORTCODE) \ + do { \ + gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + } while (0) + +/* + * Approximate reciprocal + * r3,p1 = sfrecipa(r0, r1) + * + * The helper packs the 2 32-bit results into a 64-bit value, + * so unpack them into the proper results. + */ +#define fGEN_TCG_F2_sfrecipa(SHORTCODE) \ + do { \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + gen_helper_sfrecipa(tmp, cpu_env, RsV, RtV); \ + tcg_gen_extrh_i64_i32(RdV, tmp); \ + tcg_gen_extrl_i64_i32(PeV, tmp); \ + tcg_temp_free_i64(tmp); \ + } while (0) + +/* + * Approximation of the reciprocal square root + * r1,p0 = sfinvsqrta(r0) + * + * The helper packs the 2 32-bit results into a 64-bit value, + * so unpack them into the proper results. + */ +#define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \ + do { \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \ + tcg_gen_extrh_i64_i32(RdV, tmp); \ + tcg_gen_extrl_i64_i32(PeV, tmp); \ + tcg_temp_free_i64(tmp); \ + } while (0) + +/* + * Add or subtract with carry. + * Predicate register is used as an extra input and output. + * r5:4 = add(r1:0, r3:2, p1):carry + */ +#define fGEN_TCG_A4_addp_c(SHORTCODE) \ + do { \ + TCGv_i64 carry = tcg_temp_new_i64(); \ + TCGv_i64 zero = tcg_const_i64(0); \ + tcg_gen_extu_i32_i64(carry, PxV); \ + tcg_gen_andi_i64(carry, carry, 1); \ + tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \ + tcg_gen_add2_i64(RddV, carry, RddV, carry, RttV, zero); \ + tcg_gen_extrl_i64_i32(PxV, carry); \ + gen_8bitsof(PxV, PxV); \ + tcg_temp_free_i64(carry); \ + tcg_temp_free_i64(zero); \ + } while (0) + +/* r5:4 = sub(r1:0, r3:2, p1):carry */ +#define fGEN_TCG_A4_subp_c(SHORTCODE) \ + do { \ + TCGv_i64 carry = tcg_temp_new_i64(); \ + TCGv_i64 zero = tcg_const_i64(0); \ + TCGv_i64 not_RttV = tcg_temp_new_i64(); \ + tcg_gen_extu_i32_i64(carry, PxV); \ + tcg_gen_andi_i64(carry, carry, 1); \ + tcg_gen_not_i64(not_RttV, RttV); \ + tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \ + tcg_gen_add2_i64(RddV, carry, RddV, carry, not_RttV, zero); \ + tcg_gen_extrl_i64_i32(PxV, carry); \ + gen_8bitsof(PxV, PxV); \ + tcg_temp_free_i64(carry); \ + tcg_temp_free_i64(zero); \ + tcg_temp_free_i64(not_RttV); \ + } while (0) + +/* + * Compare each of the 8 unsigned bytes + * The minimum is placed in each byte of the destination. + * Each bit of the predicate is set true if the bit from the first operand + * is greater than the bit from the second operand. + * r5:4,p1 = vminub(r1:0, r3:2) + */ +#define fGEN_TCG_A6_vminub_RdP(SHORTCODE) \ + do { \ + TCGv left = tcg_temp_new(); \ + TCGv right = tcg_temp_new(); \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_movi_tl(PeV, 0); \ + tcg_gen_movi_i64(RddV, 0); \ + for (int i = 0; i < 8; i++) { \ + gen_get_byte_i64(left, i, RttV, false); \ + gen_get_byte_i64(right, i, RssV, false); \ + tcg_gen_setcond_tl(TCG_COND_GT, tmp, left, right); \ + tcg_gen_deposit_tl(PeV, PeV, tmp, i, 1); \ + tcg_gen_umin_tl(tmp, left, right); \ + gen_set_byte_i64(i, RddV, tmp); \ + } \ + tcg_temp_free(left); \ + tcg_temp_free(right); \ + tcg_temp_free(tmp); \ + } while (0) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index db9f663..7ceb25b 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -316,7 +316,7 @@ def genptr_dst_write(f, tag, regtype, regid): print("Bad register parse: ", regtype, regid) elif (regtype == "P"): if (regid in {"d", "e", "x"}): - f.write(" gen_log_pred_write(%s%sN, %s%sV);\n" % \ + f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \ (regtype, regid, regtype, regid)) f.write(" ctx_log_pred_write(ctx, %s%sN);\n" % \ (regtype, regid)) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 7481f4c..f93f895 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -15,7 +15,6 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#define QEMU_GENERATE #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" @@ -24,7 +23,9 @@ #include "insn.h" #include "opcodes.h" #include "translate.h" +#define QEMU_GENERATE /* Used internally by macros.h */ #include "macros.h" +#undef QEMU_GENERATE #include "gen_tcg.h" static inline TCGv gen_read_preg(TCGv pred, uint8_t num) @@ -35,20 +36,24 @@ static inline TCGv gen_read_preg(TCGv pred, uint8_t num) static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) { - TCGv one = tcg_const_tl(1); TCGv zero = tcg_const_tl(0); TCGv slot_mask = tcg_temp_new(); tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, val, hex_new_value[rnum]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], slot_mask, zero, - one, hex_reg_written[rnum]); -#endif + if (HEX_DEBUG) { + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); + } - tcg_temp_free(one); tcg_temp_free(zero); tcg_temp_free(slot_mask); } @@ -56,45 +61,44 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) static inline void gen_log_reg_write(int rnum, TCGv val) { tcg_gen_mov_tl(hex_new_value[rnum], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum], 1); + } } static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot) { TCGv val32 = tcg_temp_new(); - TCGv one = tcg_const_tl(1); TCGv zero = tcg_const_tl(0); TCGv slot_mask = tcg_temp_new(); tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, - val32, hex_new_value[rnum]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], + tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, - one, hex_reg_written[rnum]); -#endif - + val32, hex_new_value[rnum]); /* High word */ tcg_gen_extrh_i64_i32(val32, val); tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1], slot_mask, zero, val32, hex_new_value[rnum + 1]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum + 1], - slot_mask, zero, - one, hex_reg_written[rnum + 1]); -#endif + if (HEX_DEBUG) { + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); + tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1], + slot_mask); + } tcg_temp_free(val32); - tcg_temp_free(one); tcg_temp_free(zero); tcg_temp_free(slot_mask); } @@ -103,33 +107,41 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) { /* Low word */ tcg_gen_extrl_i64_i32(hex_new_value[rnum], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum], 1); + } /* High word */ tcg_gen_extrh_i64_i32(hex_new_value[rnum + 1], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); + } } -static inline void gen_log_pred_write(int pnum, TCGv val) +static inline void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { TCGv zero = tcg_const_tl(0); TCGv base_val = tcg_temp_new(); TCGv and_val = tcg_temp_new(); TCGv pred_written = tcg_temp_new(); - /* Multiple writes to the same preg are and'ed together */ tcg_gen_andi_tl(base_val, val, 0xff); - tcg_gen_and_tl(and_val, base_val, hex_new_pred_value[pnum]); - tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pnum); - tcg_gen_movcond_tl(TCG_COND_NE, hex_new_pred_value[pnum], - pred_written, zero, - and_val, base_val); + + /* + * Section 6.1.3 of the Hexagon V67 Programmer's Reference Manual + * + * Multiple writes to the same preg are and'ed together + * If this is the first predicate write in the packet, do a + * straight assignment. Otherwise, do an and. + */ + if (!test_bit(pnum, ctx->pregs_written)) { + tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val); + } else { + tcg_gen_and_tl(hex_new_pred_value[pnum], + hex_new_pred_value[pnum], base_val); + } tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); tcg_temp_free(zero); @@ -254,6 +266,61 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, } } +static TCGv gen_get_byte(TCGv result, int N, TCGv src, bool sign) +{ + if (sign) { + tcg_gen_sextract_tl(result, src, N * 8, 8); + } else { + tcg_gen_extract_tl(result, src, N * 8, 8); + } + return result; +} + +static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign) +{ + TCGv_i64 res64 = tcg_temp_new_i64(); + if (sign) { + tcg_gen_sextract_i64(res64, src, N * 8, 8); + } else { + tcg_gen_extract_i64(res64, src, N * 8, 8); + } + tcg_gen_extrl_i64_i32(result, res64); + tcg_temp_free_i64(res64); + + return result; +} + +static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign) +{ + if (sign) { + tcg_gen_sextract_tl(result, src, N * 16, 16); + } else { + tcg_gen_extract_tl(result, src, N * 16, 16); + } + return result; +} + +static inline void gen_set_half(int N, TCGv result, TCGv src) +{ + tcg_gen_deposit_tl(result, result, src, N * 16, 16); +} + +static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src) +{ + TCGv_i64 src64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(src64, src); + tcg_gen_deposit_i64(result, result, src64, N * 16, 16); + tcg_temp_free_i64(src64); +} + +static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) +{ + TCGv_i64 src64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(src64, src); + tcg_gen_deposit_i64(result, result, src64, N * 8, 8); + tcg_temp_free_i64(src64); +} + static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index) { tcg_gen_qemu_ld32u(dest, vaddr, mem_index); @@ -327,5 +394,85 @@ static inline void gen_store_conditional8(CPUHexagonState *env, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot) +{ + tcg_gen_mov_tl(hex_store_addr[slot], vaddr); + tcg_gen_movi_tl(hex_store_width[slot], width); + tcg_gen_mov_tl(hex_store_val32[slot], src); +} + +static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 1, slot); + ctx->store_width[slot] = 1; +} + +static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store1(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 2, slot); + ctx->store_width[slot] = 2; +} + +static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store2(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 4, slot); + ctx->store_width[slot] = 4; +} + +static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store4(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, + DisasContext *ctx, int slot) +{ + tcg_gen_mov_tl(hex_store_addr[slot], vaddr); + tcg_gen_movi_tl(hex_store_width[slot], 8); + tcg_gen_mov_i64(hex_store_val64[slot], src); + ctx->store_width[slot] = 8; +} + +static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, + DisasContext *ctx, int slot) +{ + TCGv_i64 tmp = tcg_const_i64(src); + gen_store8(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free_i64(tmp); +} + +static TCGv gen_8bitsof(TCGv result, TCGv value) +{ + TCGv zero = tcg_const_tl(0); + TCGv ones = tcg_const_tl(0xff); + tcg_gen_movcond_tl(TCG_COND_NE, result, value, zero, ones, zero); + tcg_temp_free(zero); + tcg_temp_free(ones); + + return result; +} + #include "tcg_funcs_generated.c.inc" #include "tcg_func_table_generated.c.inc" diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index a5f340c..ca201fb 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -19,13 +19,16 @@ #include "helper_protos_generated.h.inc" DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) -#if HEX_DEBUG DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) -#endif DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) +DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_3(sfrecipa, i64, env, f32, f32) +DEF_HELPER_2(sfinvsqrta, i64, env, f32) +DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) @@ -38,21 +41,21 @@ DEF_HELPER_2(conv_ud2sf, f32, env, s64) DEF_HELPER_2(conv_ud2df, f64, env, s64) DEF_HELPER_2(conv_d2sf, f32, env, s64) DEF_HELPER_2(conv_d2df, f64, env, s64) -DEF_HELPER_2(conv_sf2uw, s32, env, f32) +DEF_HELPER_2(conv_sf2uw, i32, env, f32) DEF_HELPER_2(conv_sf2w, s32, env, f32) -DEF_HELPER_2(conv_sf2ud, s64, env, f32) +DEF_HELPER_2(conv_sf2ud, i64, env, f32) DEF_HELPER_2(conv_sf2d, s64, env, f32) -DEF_HELPER_2(conv_df2uw, s32, env, f64) +DEF_HELPER_2(conv_df2uw, i32, env, f64) DEF_HELPER_2(conv_df2w, s32, env, f64) -DEF_HELPER_2(conv_df2ud, s64, env, f64) +DEF_HELPER_2(conv_df2ud, i64, env, f64) DEF_HELPER_2(conv_df2d, s64, env, f64) -DEF_HELPER_2(conv_sf2uw_chop, s32, env, f32) +DEF_HELPER_2(conv_sf2uw_chop, i32, env, f32) DEF_HELPER_2(conv_sf2w_chop, s32, env, f32) -DEF_HELPER_2(conv_sf2ud_chop, s64, env, f32) +DEF_HELPER_2(conv_sf2ud_chop, i64, env, f32) DEF_HELPER_2(conv_sf2d_chop, s64, env, f32) -DEF_HELPER_2(conv_df2uw_chop, s32, env, f64) +DEF_HELPER_2(conv_df2uw_chop, i32, env, f64) DEF_HELPER_2(conv_df2w_chop, s32, env, f64) -DEF_HELPER_2(conv_df2ud_chop, s64, env, f64) +DEF_HELPER_2(conv_df2ud_chop, i64, env, f64) DEF_HELPER_2(conv_df2d_chop, s64, env, f64) DEF_HELPER_3(sfadd, f32, env, f32, f32) DEF_HELPER_3(sfsub, f32, env, f32, f32) diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c index 378d8a6..6091286 100644 --- a/target/hexagon/iclass.c +++ b/target/hexagon/iclass.c @@ -53,10 +53,6 @@ SlotMask find_iclass_slots(Opcode opcode, int itype) (opcode == Y2_isync) || (opcode == J2_pause) || (opcode == J4_hintjumpr)) { return SLOTS_2; - } else if ((itype == ICLASS_V2LDST) && (GET_ATTRIB(opcode, A_STORE))) { - return SLOTS_01; - } else if ((itype == ICLASS_V2LDST) && (!GET_ATTRIB(opcode, A_STORE))) { - return SLOTS_01; } else if (GET_ATTRIB(opcode, A_CRSLOT23)) { return SLOTS_23; } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) { diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef index 45cc529..58477ae 100644 --- a/target/hexagon/imported/alu.idef +++ b/target/hexagon/imported/alu.idef @@ -153,6 +153,21 @@ Q6INSN(A2_subp,"Rdd32=sub(Rtt32,Rss32)",ATTRIBS(), "Sub", { RddV=RttV-RssV;}) +/* 64-bit with carry */ + +Q6INSN(A4_addp_c,"Rdd32=add(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Add with Carry", +{ + RddV = RssV + RttV + fLSBOLD(PxV); + PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,RttV,fLSBOLD(PxV))); +}) + +Q6INSN(A4_subp_c,"Rdd32=sub(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Sub with Carry", +{ + RddV = RssV + ~RttV + fLSBOLD(PxV); + PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,~RttV,fLSBOLD(PxV))); +}) + + /* NEG and ABS */ Q6INSN(A2_negsat,"Rd32=neg(Rs32):sat",ATTRIBS(), @@ -1240,6 +1255,35 @@ MINMAX(uw,WORD,UWORD,2) #undef VMINORMAX3 +Q6INSN(A5_ACS,"Rxx32,Pe4=vacsh(Rss32,Rtt32)",ATTRIBS(), +"Add Compare and Select elements of two vectors, record the maximums and the decisions ", +{ + fHIDE(int i;) + fHIDE(int xv;) + fHIDE(int sv;) + fHIDE(int tv;) + for (i = 0; i < 4; i++) { + xv = (int) fGETHALF(i,RxxV); + sv = (int) fGETHALF(i,RssV); + tv = (int) fGETHALF(i,RttV); + xv = xv + tv; //assumes 17bit datapath + sv = sv - tv; //assumes 17bit datapath + fSETBIT(i*2, PeV, (xv > sv)); + fSETBIT(i*2+1,PeV, (xv > sv)); + fSETHALF(i, RxxV, fSATH(fMAX(xv,sv))); + } +}) + +Q6INSN(A6_vminub_RdP,"Rdd32,Pe4=vminub(Rtt32,Rss32)",ATTRIBS(), +"Vector minimum of bytes, records minimum and decision vector", +{ + fHIDE(int i;) + for (i = 0; i < 8; i++) { + fSETBIT(i, PeV, (fGETUBYTE(i,RttV) > fGETUBYTE(i,RssV))); + fSETBYTE(i,RddV,fMIN(fGETUBYTE(i,RttV),fGETUBYTE(i,RssV))); + } +}) + /**********************************************/ /* Vector Min/Max */ /**********************************************/ diff --git a/target/hexagon/imported/compare.idef b/target/hexagon/imported/compare.idef index 3551467..abd016f 100644 --- a/target/hexagon/imported/compare.idef +++ b/target/hexagon/imported/compare.idef @@ -198,11 +198,11 @@ Q6INSN(C4_or_orn,"Pd4=or(Ps4,or(Pt4,!Pu4))",ATTRIBS(A_CRSLOT23), Q6INSN(C2_any8,"Pd4=any8(Ps4)",ATTRIBS(A_CRSLOT23), "Logical ANY of low 8 predicate bits", -{ PsV ? (PdV=0xff) : (PdV=0x00); }) +{ PdV = (PsV ? 0xff : 0x00); }) Q6INSN(C2_all8,"Pd4=all8(Ps4)",ATTRIBS(A_CRSLOT23), "Logical ALL of low 8 predicate bits", -{ (PsV==0xff) ? (PdV=0xff) : (PdV=0x00); }) +{ PdV = (PsV == 0xff ? 0xff : 0x00); }) Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(), "Pack the odd and even bits of two predicate registers", @@ -212,7 +212,7 @@ Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(), Q6INSN(C2_mux,"Rd32=mux(Pu4,Rs32,Rt32)",ATTRIBS(), "Scalar MUX", -{ (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=RtV); }) +{ RdV = (fLSBOLD(PuV) ? RsV : RtV); }) Q6INSN(C2_cmovenewit,"if (Pu4.new) Rd32=#s12",ATTRIBS(A_ARCHV2), @@ -269,18 +269,18 @@ Q6INSN(C2_ccombinewf,"if (!Pu4) Rdd32=combine(Rs32,Rt32)",ATTRIBS(A_ARCHV2), Q6INSN(C2_muxii,"Rd32=mux(Pu4,#s8,#S8)",ATTRIBS(A_ARCHV2), "Scalar MUX immediates", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=SiV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : SiV); }) Q6INSN(C2_muxir,"Rd32=mux(Pu4,Rs32,#s8)",ATTRIBS(A_ARCHV2), "Scalar MUX register immediate", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=siV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? RsV : siV); }) Q6INSN(C2_muxri,"Rd32=mux(Pu4,#s8,Rs32)",ATTRIBS(A_ARCHV2), "Scalar MUX register immediate", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=RsV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : RsV); }) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index c21cb73..35ae3d2 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -294,12 +294,14 @@ DEF_CLASS32(ICLASS_LD" ---- -------- PP------ --------",LD) DEF_CLASS32(ICLASS_LD" 0--- -------- PP------ --------",LD_ADDR_ROFFSET) +DEF_CLASS32(ICLASS_LD" 100- -------- PP----0- --------",LD_ADDR_POST_CIRC_IMMED) DEF_CLASS32(ICLASS_LD" 101- -------- PP00---- --------",LD_ADDR_POST_IMMED) DEF_CLASS32(ICLASS_LD" 101- -------- PP01---- --------",LD_ADDR_ABS_UPDATE_V4) DEF_CLASS32(ICLASS_LD" 101- -------- PP1----- --------",LD_ADDR_POST_IMMED_PRED_V2) DEF_CLASS32(ICLASS_LD" 110- -------- PP-0---- 0-------",LD_ADDR_POST_REG) DEF_CLASS32(ICLASS_LD" 110- -------- PP-1---- --------",LD_ADDR_ABS_PLUS_REG_V4) DEF_CLASS32(ICLASS_LD" 100- -------- PP----1- --------",LD_ADDR_POST_CREG_V2) +DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 0-------",LD_ADDR_POST_BREV_REG) DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 1-------",LD_ADDR_PRED_ABS_V4) DEF_FIELD32(ICLASS_LD" !!!- -------- PP------ --------",LD_Amode,"Amode") @@ -308,18 +310,24 @@ DEF_FIELD32(ICLASS_LD" ---- --!----- PP------ --------",LD_UN,"Unsigned") #define STD_LD_ENC(TAG,OPC) \ DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiddddd")\ +DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiddddd")\ DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiddddd")\ DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIddddd")\ DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--ddddd")\ DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIddddd")\ +DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--ddddd")\ +DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--ddddd") #define STD_LDX_ENC(TAG,OPC) \ DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiyyyyy")\ +DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiyyyyy")\ DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiyyyyy")\ DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIyyyyy")\ DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--yyyyy")\ DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIyyyyy")\ +DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--yyyyy")\ +DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--yyyyy") #define STD_PLD_ENC(TAG,OPC) \ @@ -334,6 +342,15 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddd /* 0 000 misc: dealloc,loadw_locked,dcfetch */ +STD_LD_ENC(bzw4,"0 101") +STD_LD_ENC(bzw2,"0 011") + +STD_LD_ENC(bsw4,"0 111") +STD_LD_ENC(bsw2,"0 001") + +STD_LDX_ENC(alignh,"0 010") +STD_LDX_ENC(alignb,"0 100") + STD_LD_ENC(rb, "1 000") STD_LD_ENC(rub, "1 001") STD_LD_ENC(rh, "1 010") @@ -351,6 +368,7 @@ STD_PLD_ENC(rd, "1 110") /* note dest reg field LSB=0, 1 is reserved */ DEF_CLASS32( ICLASS_LD" 0--0 000----- PP------ --------",LD_MISC) DEF_ANTICLASS32(ICLASS_LD" 0--0 000----- PP------ --------",LD_ADDR_ROFFSET) +DEF_ANTICLASS32(ICLASS_LD" 1000 000----- PP------ --------",LD_ADDR_POST_CIRC_IMMED) DEF_ANTICLASS32(ICLASS_LD" 1010 000----- PP------ --------",LD_ADDR_POST_IMMED) DEF_ANTICLASS32(ICLASS_LD" 1100 000----- PP------ --------",LD_ADDR_POST_REG) DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG) @@ -397,6 +415,7 @@ DEF_FIELD32(ICLASS_ST" ---! !!------ PP------ --------",ST_Type,"Type") DEF_FIELD32(ICLASS_ST" ---- --!----- PP------ --------",ST_UN,"Unsigned") DEF_CLASS32(ICLASS_ST" 0--1 -------- PP------ --------",ST_ADDR_ROFFSET) +DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------0-",ST_ADDR_POST_CIRC_IMMED) DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 0-----0-",ST_ADDR_POST_IMMED) DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 1-------",ST_ADDR_ABS_UPDATE_V4) DEF_CLASS32(ICLASS_ST" 1011 -------- PP1----- --------",ST_ADDR_POST_IMMED_PRED_V2) @@ -404,6 +423,7 @@ DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 1-------",ST_ADDR_PRED_ABS_V4) DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 0-------",ST_ADDR_POST_REG) DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 1-------",ST_ADDR_ABS_PLUS_REG_V4) DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------1-",ST_ADDR_POST_CREG_V2) +DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 0-------",ST_ADDR_POST_BREV_REG) DEF_CLASS32(ICLASS_ST" 0--0 1------- PP------ --------",ST_MISC_STORELIKE) DEF_CLASS32(ICLASS_ST" 1--0 0------- PP------ --------",ST_MISC_BUSOP) DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP) @@ -411,10 +431,13 @@ DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP) #define STD_ST_ENC(TAG,OPC,SRC) \ DEF_ENC32(S2_store##TAG##_io, ICLASS_ST" 0 ii "OPC" sssss PPi"SRC" iiiiiiii")\ +DEF_ENC32(S2_store##TAG##_pci, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0iiii-0-")\ DEF_ENC32(S2_store##TAG##_pi, ICLASS_ST" 1 01 "OPC" xxxxx PP0"SRC" 0iiii-0-")\ DEF_ENC32(S4_store##TAG##_ap, ICLASS_ST" 1 01 "OPC" eeeee PP0"SRC" 1-IIIIII")\ DEF_ENC32(S2_store##TAG##_pr, ICLASS_ST" 1 10 "OPC" xxxxx PPu"SRC" 0-------")\ DEF_ENC32(S4_store##TAG##_ur, ICLASS_ST" 1 10 "OPC" uuuuu PPi"SRC" 1iIIIIII")\ +DEF_ENC32(S2_store##TAG##_pcr, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0-----1-")\ +DEF_ENC32(S2_store##TAG##_pbr, ICLASS_ST" 1 11 "OPC" xxxxx PPu"SRC" 0-------") #define STD_PST_ENC(TAG,OPC,SRC) \ @@ -1017,6 +1040,8 @@ MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") +MPY_ENC(A5_ACS, "1010","xxxxx","0","1","0","1","ee") +MPY_ENC(A6_vminub_RdP, "1010","ddddd","0","1","1","1","ee") /* */ @@ -1028,6 +1053,7 @@ MPY_ENC(F2_sfmin, "1011","ddddd","0","0","0","1","01") MPY_ENC(F2_sfmpy, "1011","ddddd","0","0","1","0","00") MPY_ENC(F2_sffixupn, "1011","ddddd","0","0","1","1","00") MPY_ENC(F2_sffixupd, "1011","ddddd","0","0","1","1","01") +MPY_ENC(F2_sfrecipa, "1011","ddddd","1","1","1","1","ee") DEF_FIELDROW_DESC32(ICLASS_M" 1100 -------- PP------ --------","[#12] Rd=(Rs,Rt)") DEF_FIELD32(ICLASS_M" 1100 -------- PP------ --!-----",Mc_tH,"Rt is High") /*Rt high */ @@ -1641,6 +1667,7 @@ SH2_RR_ENC(F2_conv_sf2w, "1011","100","-","000","ddddd") SH2_RR_ENC(F2_conv_sf2uw_chop, "1011","011","-","001","ddddd") SH2_RR_ENC(F2_conv_sf2w_chop, "1011","100","-","001","ddddd") SH2_RR_ENC(F2_sffixupr, "1011","101","-","000","ddddd") +SH2_RR_ENC(F2_sfinvsqrta, "1011","111","-","0ee","ddddd") DEF_FIELDROW_DESC32(ICLASS_S2op" 1100 -------- PP------ --------","[#12] Rd=(Rs,#u6)") @@ -1740,11 +1767,14 @@ SH_RRR_ENC(S4_vxsubaddh, "0001","01-","-","110","ddddd") SH_RRR_ENC(S4_vxaddsubhr, "0001","11-","-","00-","ddddd") SH_RRR_ENC(S4_vxsubaddhr, "0001","11-","-","01-","ddddd") SH_RRR_ENC(S4_extractp_rp, "0001","11-","-","10-","ddddd") +SH_RRR_ENC(S2_cabacdecbin, "0001","11-","-","11-","ddddd") /* implicit P0 write */ DEF_FIELDROW_DESC32(ICLASS_S3op" 0010 -------- PP------ --------","[#2] Rdd=(Rss,Rtt,Pu)") SH_RRR_ENC(S2_valignrb, "0010","0--","-","-uu","ddddd") SH_RRR_ENC(S2_vsplicerb, "0010","100","-","-uu","ddddd") +SH_RRR_ENC(A4_addp_c, "0010","110","-","-xx","ddddd") +SH_RRR_ENC(A4_subp_c, "0010","111","-","-xx","ddddd") DEF_FIELDROW_DESC32(ICLASS_S3op" 0011 -------- PP------ --------","[#3] Rdd=(Rss,Rt)") diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef index 76cecfe..3e75bc4 100644 --- a/target/hexagon/imported/float.idef +++ b/target/hexagon/imported/float.idef @@ -146,6 +146,22 @@ Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(), }) +Q6INSN(F2_sfrecipa,"Rd32,Pe4=sfrecipa(Rs32,Rt32)",ATTRIBS(), +"Reciprocal Approximation for Division", +{ + fHIDE(int idx;) + fHIDE(int adjust;) + fHIDE(int mant;) + fHIDE(int exp;) + if (fSF_RECIP_COMMON(RsV,RtV,RdV,adjust)) { + PeV = adjust; + idx = (RtV >> 16) & 0x7f; + mant = (fSF_RECIP_LOOKUP(idx) << 15) | 1; + exp = fSF_BIAS() - (fSF_GETEXP(RtV) - fSF_BIAS()) - 1; + RdV = fMAKESF(fGETBIT(31,RtV),exp,mant); + } +}) + Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(), "Fix Up Numerator", { @@ -162,6 +178,22 @@ Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(), RdV = RtV; }) +Q6INSN(F2_sfinvsqrta,"Rd32,Pe4=sfinvsqrta(Rs32)",ATTRIBS(), +"Reciprocal Square Root Approximation", +{ + fHIDE(int idx;) + fHIDE(int adjust;) + fHIDE(int mant;) + fHIDE(int exp;) + if (fSF_INVSQRT_COMMON(RsV,RdV,adjust)) { + PeV = adjust; + idx = (RsV >> 17) & 0x7f; + mant = (fSF_INVSQRT_LOOKUP(idx) << 15); + exp = fSF_BIAS() - ((fSF_GETEXP(RsV) - fSF_BIAS()) >> 1) - 1; + RdV = fMAKESF(fGETBIT(31,RsV),exp,mant); + } +}) + Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(), "Fix Up Radicand", { diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 78a2ea4..359d3b7 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -25,7 +25,10 @@ Q6INSN(L2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")", ATTRIB,DESCR,{fIMMEXT( Q6INSN(L4_##TAG##_ur, OPER"(Rt32<<#u2+#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RtV,uiV); SEMANTICS;})\ Q6INSN(L4_##TAG##_ap, OPER"(Re32=#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\ Q6INSN(L2_##TAG##_pr, OPER"(Rx32++Mu2)", ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)", ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS;})\ Q6INSN(L2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")", ATTRIB,DESCR,{fEA_REG(RxV); fPM_I(RxV,siV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))",ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))", ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;}) /* The set of 32-bit load instructions */ STD_LD_AMODES(loadrub,"Rd32=memub","Load Unsigned Byte",ATTRIBS(A_LOAD),"0",fLOAD(1,1,u,EA,RdV),0) @@ -35,6 +38,68 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1 STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2) STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3) +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETUBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETUBYTE(i,tmpV)); + } +},2) + + + +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETBYTE(i,tmpV)); + } +},2) + + + +STD_LD_AMODES(loadalignh, "Ryy32=memh_fifo", "Load Half-word into shifted vector", +ATTRIBS(A_LOAD),"1", +{ + fHIDE(size8u_t tmpV;) + fLOAD(1,2,u,EA,tmpV); + RyyV = (((size8u_t)RyyV)>>16)|(tmpV<<48); +},1) + + +STD_LD_AMODES(loadalignb, "Ryy32=memb_fifo", "Load byte into shifted vector", +ATTRIBS(A_LOAD),"0", +{ + fHIDE(size8u_t tmpV;) + fLOAD(1,1,u,EA,tmpV); + RyyV = (((size8u_t)RyyV)>>8)|(tmpV<<56); +},0) + + + + /* The set of addressing modes standard to all Store instructions */ #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\ Q6INSN(S2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")="DEST, ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\ @@ -42,6 +107,9 @@ Q6INSN(S2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")="DEST, ATTRIB,DESCR,{fEA_REG( Q6INSN(S4_##TAG##_ap, OPER"(Re32=#U6)="DEST, ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\ Q6INSN(S2_##TAG##_pr, OPER"(Rx32++Mu2)="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS; })\ Q6INSN(S4_##TAG##_ur, OPER"(Ru32<<#u2+#U6)="DEST, ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RuV,uiV); SEMANTICS;})\ +Q6INSN(S2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)="DEST, ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS; })\ +Q6INSN(S2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\ +Q6INSN(S2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;}) /* The set of 32-bit store instructions */ diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def index 65292c7..32ed3bf 100755 --- a/target/hexagon/imported/macros.def +++ b/target/hexagon/imported/macros.def @@ -92,6 +92,21 @@ DEF_MACRO( /* attribs */ ) + +DEF_MACRO( + fINSERT_RANGE, + { + int offset=LOWBIT; + int width=HIBIT-LOWBIT+1; + /* clear bits where new bits go */ + INREG &= ~(((fCONSTLL(1)<<width)-1)<<offset); + /* OR in new bits */ + INREG |= ((INVAL & ((fCONSTLL(1)<<width)-1)) << offset); + }, + /* attribs */ +) + + DEF_MACRO( f8BITSOF, ( (VAL) ? 0xff : 0x00), @@ -277,6 +292,12 @@ DEF_MACRO( /*************************************/ DEF_MACRO( + fREAD_IREG, /* read modifier register */ + (fSXTN(11,64,(((VAL) & 0xf0000000)>>21) | ((VAL>>17)&0x7f) )), /* behavior */ + () +) + +DEF_MACRO( fREAD_LR, /* read link register */ (READ_RREG(REG_LR)), /* behavior */ () @@ -307,6 +328,12 @@ DEF_MACRO( ) DEF_MACRO( + fREAD_CSREG, /* read CS register */ + (READ_RREG(REG_CSA+N)), /* behavior */ + () +) + +DEF_MACRO( fREAD_LC0, /* read loop count */ (READ_RREG(REG_LC0)), /* behavior */ () @@ -807,6 +834,12 @@ DEF_MACRO( ) DEF_MACRO( + fEA_BREVR, /* Calculate EA with bit reversed bottom of REGISTER */ + EA=fbrev(REG), + () +) + +DEF_MACRO( fEA_GPI, /* Calculate EA with Global Poitner + Immediate */ do { EA=fREAD_GP()+IMM; fGP_DOCHKPAGECROSS(fREAD_GP(),EA); } while (0), () @@ -825,6 +858,20 @@ DEF_MACRO( ) DEF_MACRO( + fPM_CIRI, /* Post Modify Register using Circular arithmetic by Immediate */ + do { fcirc_add(REG,siV,MuV); } while (0), + () +) + +DEF_MACRO( + fPM_CIRR, /* Post Modify Register using Circular arithmetic by register */ + do { fcirc_add(REG,VAL,MuV); } while (0), + () +) + + + +DEF_MACRO( fSCALE, /* scale by N */ (((size8s_t)(A))<<N), /* optional attributes */ diff --git a/target/hexagon/imported/shift.idef b/target/hexagon/imported/shift.idef index e328ab7..b32c4e0 100644 --- a/target/hexagon/imported/shift.idef +++ b/target/hexagon/imported/shift.idef @@ -1029,6 +1029,53 @@ Q6INSN(S4_clbpaddi,"Rd32=add(clb(Rss32),#s6)",ATTRIBS(A_ARCHV2), { RdV = (fMAX(fCL1_8(RssV),fCL1_8(~RssV)))+siV;}) + +Q6INSN(S2_cabacdecbin,"Rdd32=decbin(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),"CABAC decode bin", +{ + fHIDE(size4u_t state;) + fHIDE(size4u_t valMPS;) + fHIDE(size4u_t bitpos;) + fHIDE(size4u_t range;) + fHIDE(size4u_t offset;) + fHIDE(size4u_t rLPS;) + fHIDE(size4u_t rMPS;) + + state = fEXTRACTU_RANGE( fGETWORD(1,RttV) ,5,0); + valMPS = fEXTRACTU_RANGE( fGETWORD(1,RttV) ,8,8); + bitpos = fEXTRACTU_RANGE( fGETWORD(0,RttV) ,4,0); + range = fGETWORD(0,RssV); + offset = fGETWORD(1,RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][ (range >>29)&3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS= (range&0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + RddV = AC_next_state_MPS_64[state]; + fINSERT_RANGE(RddV,8,8,valMPS); + fINSERT_RANGE(RddV,31,23,(rMPS>>23)); + fSETWORD(1,RddV,offset); + fWRITE_P0(valMPS); + + + } + /* least probable region */ + else { + RddV = AC_next_state_LPS_64[state]; + fINSERT_RANGE(RddV,8,8,((!state)?(1-valMPS):(valMPS))); + fINSERT_RANGE(RddV,31,23,(rLPS>>23)); + fSETWORD(1,RddV,(offset-rMPS)); + fWRITE_P0((valMPS^1)); + } +}) + + Q6INSN(S2_clb,"Rd32=clb(Rs32)",ATTRIBS(), "Count leading bits", {RdV = fMAX(fCL1_4(RsV),fCL1_4(~RsV));}) diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h index 5756a1d..2e34591 100644 --- a/target/hexagon/insn.h +++ b/target/hexagon/insn.h @@ -40,14 +40,15 @@ struct Instruction { uint32_t iclass:6; uint32_t slot:3; - uint32_t part1:1; /* + uint32_t which_extended:1; /* If has an extender, which immediate */ + uint32_t new_value_producer_slot:4; + + bool part1; /* * cmp-jumps are split into two insns. * set for the compare and clear for the jump */ - uint32_t extension_valid:1; /* Has a constant extender attached */ - uint32_t which_extended:1; /* If has an extender, which immediate */ - uint32_t is_endloop:1; /* This is an end of loop */ - uint32_t new_value_producer_slot:4; + bool extension_valid; /* Has a constant extender attached */ + bool is_endloop; /* This is an end of loop */ int32_t immed[IMMEDS_MAX]; /* immediate field */ }; @@ -58,13 +59,13 @@ struct Packet { uint16_t encod_pkt_size_in_bytes; /* Pre-decodes about COF */ - uint32_t pkt_has_cof:1; /* Has any change-of-flow */ - uint32_t pkt_has_endloop:1; + bool pkt_has_cof; /* Has any change-of-flow */ + bool pkt_has_endloop; - uint32_t pkt_has_dczeroa:1; + bool pkt_has_dczeroa; - uint32_t pkt_has_store_s0:1; - uint32_t pkt_has_store_s1:1; + bool pkt_has_store_s0; + bool pkt_has_store_s1; Insn insn[INSTRUCTIONS_MAX]; }; diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 2da85c8..6b20aff 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -22,11 +22,12 @@ * Change HEX_DEBUG to 1 to turn on debugging output */ #define HEX_DEBUG 0 -#if HEX_DEBUG -#define HEX_DEBUG_LOG(...) qemu_log(__VA_ARGS__) -#else -#define HEX_DEBUG_LOG(...) do { } while (0) -#endif +#define HEX_DEBUG_LOG(...) \ + do { \ + if (HEX_DEBUG) { \ + qemu_log(__VA_ARGS__); \ + } \ + } while (0) int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index cfcb817..b726c3b 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -133,6 +133,38 @@ CHECK_NOSHUF; \ tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \ } while (0) + +#define MEM_STORE1_FUNC(X) \ + __builtin_choose_expr(TYPE_INT(X), \ + gen_store1i, \ + __builtin_choose_expr(TYPE_TCGV(X), \ + gen_store1, (void)0)) +#define MEM_STORE1(VA, DATA, SLOT) \ + MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + +#define MEM_STORE2_FUNC(X) \ + __builtin_choose_expr(TYPE_INT(X), \ + gen_store2i, \ + __builtin_choose_expr(TYPE_TCGV(X), \ + gen_store2, (void)0)) +#define MEM_STORE2(VA, DATA, SLOT) \ + MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + +#define MEM_STORE4_FUNC(X) \ + __builtin_choose_expr(TYPE_INT(X), \ + gen_store4i, \ + __builtin_choose_expr(TYPE_TCGV(X), \ + gen_store4, (void)0)) +#define MEM_STORE4(VA, DATA, SLOT) \ + MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) + +#define MEM_STORE8_FUNC(X) \ + __builtin_choose_expr(TYPE_INT(X), \ + gen_store8i, \ + __builtin_choose_expr(TYPE_TCGV_I64(X), \ + gen_store8, (void)0)) +#define MEM_STORE8(VA, DATA, SLOT) \ + MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT) #else #define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) #define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) @@ -190,6 +222,13 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num) (((HIBIT) - (LOWBIT) + 1) ? \ extract64((INREG), (LOWBIT), ((HIBIT) - (LOWBIT) + 1)) : \ 0LL) +#define fINSERT_RANGE(INREG, HIBIT, LOWBIT, INVAL) \ + do { \ + int width = ((HIBIT) - (LOWBIT) + 1); \ + INREG = (width >= 0 ? \ + deposit64((INREG), (LOWBIT), width, (INVAL)) : \ + INREG); \ + } while (0) #define f8BITSOF(VAL) ((VAL) ? 0xff : 0x00) @@ -285,6 +324,39 @@ static inline void gen_logical_not(TCGv dest, TCGv src) #define fPCALIGN(IMM) IMM = (IMM & ~PCALIGN_MASK) +#ifdef QEMU_GENERATE +static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) +{ + /* + * Section 2.2.4 of the Hexagon V67 Programmer's Reference Manual + * + * The "I" value from a modifier register is divided into two pieces + * LSB bits 23:17 + * MSB bits 31:28 + * The value is signed + * + * At the end we shift the result according to the shift argument + */ + TCGv msb = tcg_temp_new(); + TCGv lsb = tcg_temp_new(); + + tcg_gen_extract_tl(lsb, val, 17, 7); + tcg_gen_sari_tl(msb, val, 21); + tcg_gen_deposit_tl(result, msb, lsb, 0, 7); + + tcg_gen_shli_tl(result, result, shift); + + tcg_temp_free(msb); + tcg_temp_free(lsb); + + return result; +} +#define fREAD_IREG(VAL, SHIFT) gen_read_ireg(ireg, (VAL), (SHIFT)) +#else +#define fREAD_IREG(VAL) \ + (fSXTN(11, 64, (((VAL) & 0xf0000000) >> 21) | ((VAL >> 17) & 0x7f))) +#endif + #define fREAD_LR() (READ_REG(HEX_REG_LR)) #define fWRITE_LR(A) WRITE_RREG(HEX_REG_LR, A) @@ -341,8 +413,6 @@ static inline void gen_logical_not(TCGv dest, TCGv src) #define fWRITE_LC0(VAL) WRITE_RREG(HEX_REG_LC0, VAL) #define fWRITE_LC1(VAL) WRITE_RREG(HEX_REG_LC1, VAL) -#define fCARRY_FROM_ADD(A, B, C) carry_from_add64(A, B, C) - #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG)) @@ -402,6 +472,21 @@ static inline void gen_logical_not(TCGv dest, TCGv src) #define fCAST8S_16S(A) (int128_exts64(A)) #define fCAST16S_8S(A) (int128_getlo(A)) +#ifdef QEMU_GENERATE +#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM) +#define fEA_RRs(REG, REG2, SCALE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_shli_tl(tmp, REG2, SCALE); \ + tcg_gen_add_tl(EA, REG, tmp); \ + tcg_temp_free(tmp); \ + } while (0) +#define fEA_IRs(IMM, REG, SCALE) \ + do { \ + tcg_gen_shli_tl(EA, REG, SCALE); \ + tcg_gen_addi_tl(EA, EA, IMM); \ + } while (0) +#else #define fEA_RI(REG, IMM) \ do { \ EA = REG + IMM; \ @@ -414,12 +499,21 @@ static inline void gen_logical_not(TCGv dest, TCGv src) do { \ EA = IMM + (REG << SCALE); \ } while (0) +#endif #ifdef QEMU_GENERATE #define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM) #define fEA_REG(REG) tcg_gen_mov_tl(EA, REG) +#define fEA_BREVR(REG) gen_helper_fbrev(EA, REG) #define fPM_I(REG, IMM) tcg_gen_addi_tl(REG, REG, IMM) #define fPM_M(REG, MVAL) tcg_gen_add_tl(REG, REG, MVAL) +#define fPM_CIRI(REG, IMM, MVAL) \ + do { \ + TCGv tcgv_siV = tcg_const_tl(siV); \ + gen_helper_fcircadd(REG, REG, tcgv_siV, MuV, \ + hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(tcgv_siV); \ + } while (0) #else #define fEA_IMM(IMM) do { EA = (IMM); } while (0) #define fEA_REG(REG) do { EA = (REG); } while (0) @@ -496,23 +590,43 @@ static inline void gen_logical_not(TCGv dest, TCGv src) gen_load_locked##SIZE##SIGN(DST, EA, ctx->mem_idx); #endif +#ifdef QEMU_GENERATE +#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, insn->slot) +#else #define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, slot) +#endif #ifdef QEMU_GENERATE #define fSTORE_LOCKED(NUM, SIZE, EA, SRC, PRED) \ gen_store_conditional##SIZE(env, ctx, PdN, PRED, EA, SRC); #endif +#ifdef QEMU_GENERATE +#define GETBYTE_FUNC(X) \ + __builtin_choose_expr(TYPE_TCGV(X), \ + gen_get_byte, \ + __builtin_choose_expr(TYPE_TCGV_I64(X), \ + gen_get_byte_i64, (void)0)) +#define fGETBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, true) +#define fGETUBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, false) +#else #define fGETBYTE(N, SRC) ((int8_t)((SRC >> ((N) * 8)) & 0xff)) #define fGETUBYTE(N, SRC) ((uint8_t)((SRC >> ((N) * 8)) & 0xff)) +#endif #define fSETBYTE(N, DST, VAL) \ do { \ DST = (DST & ~(0x0ffLL << ((N) * 8))) | \ (((uint64_t)((VAL) & 0x0ffLL)) << ((N) * 8)); \ } while (0) + +#ifdef QEMU_GENERATE +#define fGETHALF(N, SRC) gen_get_half(HALF, N, SRC, true) +#define fGETUHALF(N, SRC) gen_get_half(HALF, N, SRC, false) +#else #define fGETHALF(N, SRC) ((int16_t)((SRC >> ((N) * 16)) & 0xffff)) #define fGETUHALF(N, SRC) ((uint16_t)((SRC >> ((N) * 16)) & 0xffff)) +#endif #define fSETHALF(N, DST, VAL) \ do { \ DST = (DST & ~(0x0ffffLL << ((N) * 16))) | \ diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index bb0b4fb..6fd9360 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -173,7 +173,6 @@ hexagon_ss.add(files( 'printinsn.c', 'arch.c', 'fma_emu.c', - 'conv_emu.c', )) target_arch += {'hexagon': hexagon_ss} diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 2c6d718..63dd685 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -25,7 +25,6 @@ #include "arch.h" #include "hex_arch_types.h" #include "fma_emu.h" -#include "conv_emu.h" #define SF_BIAS 127 #define SF_MANTBITS 23 @@ -35,7 +34,7 @@ static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc) { - CPUState *cs = CPU(hexagon_env_get_cpu(env)); + CPUState *cs = env_cpu(env); qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); cs->exception_index = exception; cpu_loop_exit_restore(cs, pc); @@ -46,8 +45,8 @@ void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -static inline void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val, uint32_t slot) +static void log_reg_write(CPUHexagonState *env, int rnum, + target_ulong val, uint32_t slot) { HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")", rnum, val, val); @@ -57,14 +56,13 @@ static inline void log_reg_write(CPUHexagonState *env, int rnum, HEX_DEBUG_LOG("\n"); env->new_value[rnum] = val; -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - env->reg_written[rnum] = 1; -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + env->reg_written[rnum] = 1; + } } -static inline void log_pred_write(CPUHexagonState *env, int pnum, - target_ulong val) +static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) { HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", @@ -79,8 +77,8 @@ static inline void log_pred_write(CPUHexagonState *env, int pnum, } } -static inline void log_store32(CPUHexagonState *env, target_ulong addr, - target_ulong val, int width, int slot) +static void log_store32(CPUHexagonState *env, target_ulong addr, + target_ulong val, int width, int slot) { HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", %" PRId32 " [0x08%" PRIx32 "])\n", @@ -90,8 +88,8 @@ static inline void log_store32(CPUHexagonState *env, target_ulong addr, env->mem_log_stores[slot].data32 = val; } -static inline void log_store64(CPUHexagonState *env, target_ulong addr, - int64_t val, int width, int slot) +static void log_store64(CPUHexagonState *env, target_ulong addr, + int64_t val, int width, int slot) { HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", %" PRId64 " [0x016%" PRIx64 "])\n", @@ -101,7 +99,7 @@ static inline void log_store64(CPUHexagonState *env, target_ulong addr, env->mem_log_stores[slot].data64 = val; } -static inline void write_new_pc(CPUHexagonState *env, target_ulong addr) +static void write_new_pc(CPUHexagonState *env, target_ulong addr) { HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr); @@ -119,7 +117,6 @@ static inline void write_new_pc(CPUHexagonState *env, target_ulong addr) } } -#if HEX_DEBUG /* Handy place to set a breakpoint */ void HELPER(debug_start_packet)(CPUHexagonState *env) { @@ -130,14 +127,12 @@ void HELPER(debug_start_packet)(CPUHexagonState *env) env->reg_written[i] = 0; } } -#endif -static inline int32_t new_pred_value(CPUHexagonState *env, int pnum) +static int32_t new_pred_value(CPUHexagonState *env, int pnum) { return env->new_pred_value[pnum]; } -#if HEX_DEBUG /* Checks for bookkeeping errors between disassembly context and runtime */ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check) { @@ -147,7 +142,6 @@ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check) g_assert_not_reached(); } } -#endif void HELPER(commit_store)(CPUHexagonState *env, int slot_num) { @@ -173,7 +167,6 @@ void HELPER(commit_store)(CPUHexagonState *env, int slot_num) } } -#if HEX_DEBUG static void print_store(CPUHexagonState *env, int slot) { if (!(env->slot_cancelled & (1 << slot))) { @@ -257,35 +250,26 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) env->gpr[HEX_REG_QEMU_INSN_CNT]); } -#endif - -static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS) -{ - int32_t length = M & 0x0001ffff; - uint32_t new_ptr = RxV + offset; - uint32_t start_addr = CS; - uint32_t end_addr = start_addr + length; - - if (new_ptr >= end_addr) { - new_ptr -= length; - } else if (new_ptr < start_addr) { - new_ptr += length; - } - - return new_ptr; -} int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) { - int32_t K_const = (M >> 24) & 0xf; - int32_t length = M & 0x1ffff; - int32_t mask = (1 << (K_const + 2)) - 1; + int32_t K_const = sextract32(M, 24, 4); + int32_t length = sextract32(M, 0, 17); uint32_t new_ptr = RxV + offset; - uint32_t start_addr = RxV & (~mask); - uint32_t end_addr = start_addr | length; + uint32_t start_addr; + uint32_t end_addr; if (K_const == 0 && length >= 4) { - return fcircadd_v4(RxV, offset, M, CS); + start_addr = CS; + end_addr = start_addr + length; + } else { + /* + * Versions v3 and earlier used the K value to specify a power-of-2 size + * 2^(K+2) that is greater than the buffer length + */ + int32_t mask = (1 << (K_const + 2)) - 1; + start_addr = RxV & (~mask); + end_addr = start_addr | length; } if (new_ptr >= end_addr) { @@ -297,24 +281,103 @@ int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) return new_ptr; } +uint32_t HELPER(fbrev)(uint32_t addr) +{ + /* + * Bit reverse the low 16 bits of the address + */ + return deposit32(addr, 0, 16, revbit16(addr)); +} + +static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant) +{ + return make_float32( + ((sign & 1) << 31) | + ((exp & 0xff) << SF_MANTBITS) | + (mant & ((1 << SF_MANTBITS) - 1))); +} + /* - * Hexagon FP operations return ~0 insteat of NaN - * The hex_check_sfnan/hex_check_dfnan functions perform this check + * sfrecipa, sfinvsqrta have two 32-bit results + * r0,p0=sfrecipa(r1,r2) + * r0,p0=sfinvsqrta(r1) + * + * Since helpers can only return a single value, we pack the two results + * into a 64-bit value. */ -static float32 hex_check_sfnan(float32 x) +uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV) { - if (float32_is_any_nan(x)) { - return make_float32(0xFFFFFFFFU); + int32_t PeV = 0; + float32 RdV; + int idx; + int adjust; + int mant; + int exp; + + arch_fpop_start(env); + if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) { + PeV = adjust; + idx = (RtV >> 16) & 0x7f; + mant = (recip_lookup_table[idx] << 15) | 1; + exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1; + RdV = build_float32(extract32(RtV, 31, 1), exp, mant); + } + arch_fpop_end(env); + return ((uint64_t)RdV << 32) | PeV; +} + +uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) +{ + int PeV = 0; + float32 RdV; + int idx; + int adjust; + int mant; + int exp; + + arch_fpop_start(env); + if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) { + PeV = adjust; + idx = (RsV >> 17) & 0x7f; + mant = (invsqrt_lookup_table[idx] << 15); + exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1; + RdV = build_float32(extract32(RsV, 31, 1), exp, mant); + } + arch_fpop_end(env); + return ((uint64_t)RdV << 32) | PeV; +} + +int64_t HELPER(vacsh_val)(CPUHexagonState *env, + int64_t RxxV, int64_t RssV, int64_t RttV) +{ + for (int i = 0; i < 4; i++) { + int xv = sextract64(RxxV, i * 16, 16); + int sv = sextract64(RssV, i * 16, 16); + int tv = sextract64(RttV, i * 16, 16); + int max; + xv = xv + tv; + sv = sv - tv; + max = xv > sv ? xv : sv; + /* Note that fSATH can set the OVF bit in usr */ + RxxV = deposit64(RxxV, i * 16, 16, fSATH(max)); } - return x; + return RxxV; } -static float64 hex_check_dfnan(float64 x) +int32_t HELPER(vacsh_pred)(CPUHexagonState *env, + int64_t RxxV, int64_t RssV, int64_t RttV) { - if (float64_is_any_nan(x)) { - return make_float64(0xFFFFFFFFFFFFFFFFULL); + int32_t PeV = 0; + for (int i = 0; i < 4; i++) { + int xv = sextract64(RxxV, i * 16, 16); + int sv = sextract64(RssV, i * 16, 16); + int tv = sextract64(RttV, i * 16, 16); + xv = xv + tv; + sv = sv - tv; + PeV = deposit32(PeV, i * 2, 1, (xv > sv)); + PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv)); } - return x; + return PeV; } /* @@ -332,8 +395,8 @@ static void check_noshuf(CPUHexagonState *env, uint32_t slot) } } -static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint8_t retval; check_noshuf(env, slot); @@ -341,8 +404,8 @@ static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint16_t retval; check_noshuf(env, slot); @@ -350,8 +413,8 @@ static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint32_t retval; check_noshuf(env, slot); @@ -359,8 +422,8 @@ static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint64_t retval; check_noshuf(env, slot); @@ -374,7 +437,6 @@ float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV) float64 out_f64; arch_fpop_start(env); out_f64 = float32_to_float64(RsV, &env->fp_status); - out_f64 = hex_check_dfnan(out_f64); arch_fpop_end(env); return out_f64; } @@ -384,7 +446,6 @@ float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV) float32 out_f32; arch_fpop_start(env); out_f32 = float64_to_float32(RssV, &env->fp_status); - out_f32 = hex_check_sfnan(out_f32); arch_fpop_end(env); return out_f32; } @@ -394,7 +455,6 @@ float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV) float32 RdV; arch_fpop_start(env); RdV = uint32_to_float32(RsV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -404,7 +464,6 @@ float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV) float64 RddV; arch_fpop_start(env); RddV = uint32_to_float64(RsV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -414,7 +473,6 @@ float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV) float32 RdV; arch_fpop_start(env); RdV = int32_to_float32(RsV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -424,7 +482,6 @@ float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV) float64 RddV; arch_fpop_start(env); RddV = int32_to_float64(RsV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -434,7 +491,6 @@ float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV) float32 RdV; arch_fpop_start(env); RdV = uint64_to_float32(RssV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -444,7 +500,6 @@ float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV) float64 RddV; arch_fpop_start(env); RddV = uint64_to_float64(RssV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -454,7 +509,6 @@ float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV) float32 RdV; arch_fpop_start(env); RdV = int64_to_float32(RssV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -464,16 +518,21 @@ float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV) float64 RddV; arch_fpop_start(env); RddV = int64_to_float64(RssV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV) +uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - RdV = conv_sf_to_4u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float32_to_uint32(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -482,16 +541,28 @@ int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV) { int32_t RdV; arch_fpop_start(env); - RdV = conv_sf_to_4s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float32_to_int32(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV) +uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - RddV = conv_sf_to_8u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float32_to_uint64(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -500,16 +571,28 @@ int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV) { int64_t RddV; arch_fpop_start(env); - RddV = conv_sf_to_8s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float32_to_int64(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV) +uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - RdV = conv_df_to_4u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float64_to_uint32(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -518,16 +601,28 @@ int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV) { int32_t RdV; arch_fpop_start(env); - RdV = conv_df_to_4s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float64_to_int32(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV) +uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - RddV = conv_df_to_8u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float64_to_uint64(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -536,17 +631,28 @@ int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV) { int64_t RddV; arch_fpop_start(env); - RddV = conv_df_to_8s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float64_to_int64(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV) +uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_sf_to_4u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -555,18 +661,28 @@ int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV) { int32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_sf_to_4s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV) +uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_sf_to_8u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -575,18 +691,28 @@ int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV) { int64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_sf_to_8s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) +uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_df_to_4u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -595,18 +721,28 @@ int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV) { int32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_df_to_4s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV) +uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_df_to_8u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -615,8 +751,13 @@ int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV) { int64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_df_to_8s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -626,7 +767,6 @@ float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_add(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -636,7 +776,6 @@ float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_sub(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -688,7 +827,6 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_maxnum(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -698,7 +836,6 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_minnum(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -765,7 +902,6 @@ float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV) float64 RddV; arch_fpop_start(env); RddV = float64_add(RssV, RttV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -775,7 +911,6 @@ float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV) float64 RddV; arch_fpop_start(env); RddV = float64_sub(RssV, RttV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -788,7 +923,6 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV) if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { float_raise(float_flag_invalid, &env->fp_status); } - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -801,7 +935,6 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV) if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { float_raise(float_flag_invalid, &env->fp_status); } - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -877,7 +1010,6 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = internal_mpyf(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -887,7 +1019,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV, { arch_fpop_start(env); RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status); - RxV = hex_check_sfnan(RxV); arch_fpop_end(env); return RxV; } @@ -919,7 +1050,6 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RsV, &env->fp_status); RxV = check_nan(RxV, RtV, &env->fp_status); tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -934,12 +1064,11 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV, arch_fpop_start(env); neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status); RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status); - RxV = hex_check_sfnan(RxV); arch_fpop_end(env); return RxV; } -static inline bool is_inf_prod(int32_t a, int32_t b) +static bool is_inf_prod(int32_t a, int32_t b) { return (float32_is_infinity(a) && float32_is_infinity(b)) || (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) || @@ -949,8 +1078,8 @@ static inline bool is_inf_prod(int32_t a, int32_t b) float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, float32 RsV, float32 RtV) { - int infinp; - int infminusinf; + bool infinp; + bool infminusinf; float32 tmp; arch_fpop_start(env); @@ -965,7 +1094,6 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RsV, &env->fp_status); RxV = check_nan(RxV, RtV, &env->fp_status); tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -983,8 +1111,8 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV, float32 RsV, float32 RtV) { - int infinp; - int infminusinf; + bool infinp; + bool infminusinf; float32 tmp; arch_fpop_start(env); @@ -1000,7 +1128,6 @@ float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RtV, &env->fp_status); float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status); tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -1024,13 +1151,11 @@ float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV) float64_is_normal(RttV)) { RddV = float64_mul(RssV, make_float64(0x4330000000000000), &env->fp_status); - RddV = hex_check_dfnan(RddV); } else if (float64_is_denormal(RttV) && (float64_getexp(RssV) >= 512) && float64_is_normal(RssV)) { RddV = float64_mul(RssV, make_float64(0x3cb0000000000000), &env->fp_status); - RddV = hex_check_dfnan(RddV); } else { RddV = RssV; } @@ -1043,7 +1168,6 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV, { arch_fpop_start(env); RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status); - RxxV = hex_check_dfnan(RxxV); arch_fpop_end(env); return RxxV; } diff --git a/target/hexagon/reg_fields.c b/target/hexagon/reg_fields.c index bdcab79..6713203 100644 --- a/target/hexagon/reg_fields.c +++ b/target/hexagon/reg_fields.c @@ -18,10 +18,9 @@ #include "qemu/osdep.h" #include "reg_fields.h" -const RegField reg_field_info[] = { +const RegField reg_field_info[NUM_REG_FIELDS] = { #define DEF_REG_FIELD(TAG, START, WIDTH) \ { START, WIDTH }, #include "reg_fields_def.h.inc" - { 0, 0 } #undef DEF_REG_FIELD }; diff --git a/target/hexagon/reg_fields.h b/target/hexagon/reg_fields.h index d3c86c94..9e2ad5d 100644 --- a/target/hexagon/reg_fields.h +++ b/target/hexagon/reg_fields.h @@ -23,8 +23,6 @@ typedef struct { int width; } RegField; -extern const RegField reg_field_info[]; - enum { #define DEF_REG_FIELD(TAG, START, WIDTH) \ TAG, @@ -33,4 +31,6 @@ enum { #undef DEF_REG_FIELD }; +extern const RegField reg_field_info[NUM_REG_FIELDS]; + #endif diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index eeaad5f..9a37644 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -35,9 +35,7 @@ TCGv hex_this_PC; TCGv hex_slot_cancelled; TCGv hex_branch_taken; TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; -#if HEX_DEBUG TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -#endif TCGv hex_new_pred_value[NUM_PREGS]; TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; @@ -54,19 +52,42 @@ static const char * const hexagon_prednames[] = { "p0", "p1", "p2", "p3" }; -void gen_exception(int excp) +static void gen_exception_raw(int excp) { TCGv_i32 helper_tmp = tcg_const_i32(excp); gen_helper_raise_exception(cpu_env, helper_tmp); tcg_temp_free_i32(helper_tmp); } -void gen_exception_debug(void) +static void gen_exec_counters(DisasContext *ctx) { - gen_exception(EXCP_DEBUG); + tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], + hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); + tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], + hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); +} + +static void gen_end_tb(DisasContext *ctx) +{ + gen_exec_counters(ctx); + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + if (ctx->base.singlestep_enabled) { + gen_exception_raw(EXCP_DEBUG); + } else { + tcg_gen_exit_tb(NULL, 0); + } + ctx->base.is_jmp = DISAS_NORETURN; +} + +static void gen_exception_end_tb(DisasContext *ctx, int excp) +{ + gen_exec_counters(ctx); + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + gen_exception_raw(excp); + ctx->base.is_jmp = DISAS_NORETURN; + } -#if HEX_DEBUG #define PACKET_BUFFER_LEN 1028 static void print_pkt(Packet *pkt) { @@ -75,10 +96,12 @@ static void print_pkt(Packet *pkt) HEX_DEBUG_LOG("%s", buf->str); g_string_free(buf, true); } -#define HEX_DEBUG_PRINT_PKT(pkt) print_pkt(pkt) -#else -#define HEX_DEBUG_PRINT_PKT(pkt) /* nothing */ -#endif +#define HEX_DEBUG_PRINT_PKT(pkt) \ + do { \ + if (HEX_DEBUG) { \ + print_pkt(pkt); \ + } \ + } while (0) static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, uint32_t words[]) @@ -88,8 +111,8 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { - words[nwords] = cpu_ldl_code(env, - ctx->base.pc_next + nwords * sizeof(uint32_t)); + words[nwords] = + translator_ldl(env, ctx->base.pc_next + nwords * sizeof(uint32_t)); found_end = is_packet_end(words[nwords]); } if (!found_end) { @@ -148,17 +171,18 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; + bitmap_zero(ctx->pregs_written, NUM_PREGS); for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); - ctx->s1_store_processed = 0; + ctx->s1_store_processed = false; -#if HEX_DEBUG - /* Handy place to set a breakpoint before the packet executes */ - gen_helper_debug_start_packet(cpu_env); - tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); -#endif + if (HEX_DEBUG) { + /* Handy place to set a breakpoint before the packet executes */ + gen_helper_debug_start_packet(cpu_env); + tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); + } /* Initialize the runtime state for packet semantics */ if (need_pc(pkt)) { @@ -185,7 +209,7 @@ static void mark_implicit_reg_write(DisasContext *ctx, Insn *insn, int attrib, int rnum) { if (GET_ATTRIB(insn->opcode, attrib)) { - int is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC); + bool is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC); if (is_predicated && !is_preloaded(ctx, rnum)) { tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]); } @@ -202,7 +226,7 @@ static void mark_implicit_pred_write(DisasContext *ctx, Insn *insn, } } -static void mark_implicit_writes(DisasContext *ctx, Insn *insn) +static void mark_implicit_reg_writes(DisasContext *ctx, Insn *insn) { mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_FP, HEX_REG_FP); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SP, HEX_REG_SP); @@ -211,7 +235,10 @@ static void mark_implicit_writes(DisasContext *ctx, Insn *insn) mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); +} +static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn) +{ mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P0, 0); mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P1, 1); mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P2, 2); @@ -222,11 +249,11 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx, Insn *insn, Packet *pkt) { if (insn->generate) { - mark_implicit_writes(ctx, insn); + mark_implicit_reg_writes(ctx, insn); insn->generate(env, ctx, insn, pkt); + mark_implicit_pred_writes(ctx, insn); } else { - gen_exception(HEX_EXCP_INVALID_OPCODE); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); } } @@ -280,10 +307,11 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt) for (i = 0; i < ctx->preg_log_idx; i++) { int pred_num = ctx->preg_log[i]; tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pred_num); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_ori_tl(hex_pred_written, hex_pred_written, + 1 << pred_num); + } } } @@ -292,20 +320,16 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt) tcg_temp_free(pval); } -#if HEX_DEBUG -static inline void gen_check_store_width(DisasContext *ctx, int slot_num) +static void gen_check_store_width(DisasContext *ctx, int slot_num) { - TCGv slot = tcg_const_tl(slot_num); - TCGv check = tcg_const_tl(ctx->store_width[slot_num]); - gen_helper_debug_check_store_width(cpu_env, slot, check); - tcg_temp_free(slot); - tcg_temp_free(check); + if (HEX_DEBUG) { + TCGv slot = tcg_const_tl(slot_num); + TCGv check = tcg_const_tl(ctx->store_width[slot_num]); + gen_helper_debug_check_store_width(cpu_env, slot, check); + tcg_temp_free(slot); + tcg_temp_free(check); + } } -#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) \ - gen_check_store_width(ctx, slot_num) -#else -#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) /* nothing */ -#endif static bool slot_is_predicated(Packet *pkt, int slot_num) { @@ -330,7 +354,7 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num) if (slot_num == 1 && ctx->s1_store_processed) { return; } - ctx->s1_store_processed = 1; + ctx->s1_store_processed = true; if (is_predicated) { TCGv cancelled = tcg_temp_new(); @@ -355,25 +379,25 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num) */ switch (ctx->store_width[slot_num]) { case 1: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st8(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 2: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st16(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 4: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st32(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 8: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st64(hex_store_val64[slot_num], hex_store_addr[slot_num], ctx->mem_idx); @@ -451,14 +475,6 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt) ctx->num_insns += num_real_insns; } -static void gen_exec_counters(DisasContext *ctx) -{ - tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], - hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); - tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], - hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); -} - static void gen_commit_packet(DisasContext *ctx, Packet *pkt) { gen_reg_writes(ctx); @@ -466,8 +482,7 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) process_store_log(ctx, pkt); process_dczeroa(ctx, pkt); update_exec_counters(ctx, pkt); -#if HEX_DEBUG - { + if (HEX_DEBUG) { TCGv has_st0 = tcg_const_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); TCGv has_st1 = @@ -479,10 +494,9 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) tcg_temp_free(has_st0); tcg_temp_free(has_st1); } -#endif if (pkt->pkt_has_cof) { - ctx->base.is_jmp = DISAS_NORETURN; + gen_end_tb(ctx); } } @@ -495,8 +509,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) nwords = read_packet_words(env, ctx, words); if (!nwords) { - gen_exception(HEX_EXCP_INVALID_PACKET); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); return; } @@ -509,8 +522,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) gen_commit_packet(ctx, &pkt); ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; } else { - gen_exception(HEX_EXCP_INVALID_PACKET); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); } } @@ -540,9 +552,7 @@ static bool hexagon_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, { DisasContext *ctx = container_of(dcbase, DisasContext, base); - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); - ctx->base.is_jmp = DISAS_NORETURN; - gen_exception_debug(); + gen_exception_end_tb(ctx, EXCP_DEBUG); /* * The address covered by the breakpoint must be included in * [tb->pc, tb->pc + tb->size) in order to for it to be @@ -589,14 +599,10 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) * The CPU log is used to compare against LLDB single stepping, * so end the TLB after every packet. */ - HexagonCPU *hex_cpu = container_of(env, HexagonCPU, env); + HexagonCPU *hex_cpu = env_archcpu(env); if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { ctx->base.is_jmp = DISAS_TOO_MANY; } -#if HEX_DEBUG - /* When debugging, only put one packet per TB */ - ctx->base.is_jmp = DISAS_TOO_MANY; -#endif } } @@ -609,19 +615,12 @@ static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_exec_counters(ctx); tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); if (ctx->base.singlestep_enabled) { - gen_exception_debug(); + gen_exception_raw(EXCP_DEBUG); } else { tcg_gen_exit_tb(NULL, 0); } break; case DISAS_NORETURN: - gen_exec_counters(ctx); - tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); - if (ctx->base.singlestep_enabled) { - gen_exception_debug(); - } else { - tcg_gen_exit_tb(NULL, 0); - } break; default: g_assert_not_reached(); @@ -654,9 +653,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) #define NAME_LEN 64 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -#if HEX_DEBUG static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -#endif static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; @@ -669,11 +666,11 @@ void hexagon_translate_init(void) opcode_init(); -#if HEX_DEBUG - if (!qemu_logfile) { - qemu_set_log(qemu_loglevel); + if (HEX_DEBUG) { + if (!qemu_logfile) { + qemu_set_log(qemu_loglevel); + } } -#endif for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { hex_gpr[i] = tcg_global_mem_new(cpu_env, @@ -685,13 +682,13 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, new_value[i]), new_value_names[i]); -#if HEX_DEBUG - snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", - hexagon_regnames[i]); - hex_reg_written[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, reg_written[i]), - reg_written_names[i]); -#endif + if (HEX_DEBUG) { + snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", + hexagon_regnames[i]); + hex_reg_written[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, reg_written[i]), + reg_written_names[i]); + } } for (i = 0; i < NUM_PREGS; i++) { hex_pred[i] = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 938f7fb..703fd13 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -34,17 +34,16 @@ typedef struct DisasContext { DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; + DECLARE_BITMAP(pregs_written, NUM_PREGS); uint8_t store_width[STORES_MAX]; - uint8_t s1_store_processed; + bool s1_store_processed; } DisasContext; static inline void ctx_log_reg_write(DisasContext *ctx, int rnum) { -#if HEX_DEBUG if (test_bit(rnum, ctx->regs_written)) { HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum); } -#endif ctx->reg_log[ctx->reg_log_idx] = rnum; ctx->reg_log_idx++; set_bit(rnum, ctx->regs_written); @@ -60,6 +59,7 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { ctx->preg_log[ctx->preg_log_idx] = pnum; ctx->preg_log_idx++; + set_bit(pnum, ctx->pregs_written); } static inline bool is_preloaded(DisasContext *ctx, int num) @@ -86,8 +86,5 @@ extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; -void gen_exception(int excp); -void gen_exception_debug(void); - void process_store(DisasContext *ctx, Packet *pkt, int slot_num); #endif diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 616af69..0992787 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -28,6 +28,7 @@ endif CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal +CFLAGS += -fno-unroll-loops HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon VPATH += $(HEX_SRC) @@ -39,7 +40,12 @@ HEX_TESTS = first HEX_TESTS += misc HEX_TESTS += preg_alias HEX_TESTS += dual_stores +HEX_TESTS += multi_result HEX_TESTS += mem_noshuf +HEX_TESTS += circ +HEX_TESTS += brev +HEX_TESTS += load_unpack +HEX_TESTS += load_align HEX_TESTS += atomics HEX_TESTS += fpstuff diff --git a/tests/tcg/hexagon/brev.c b/tests/tcg/hexagon/brev.c new file mode 100644 index 0000000..9736a24 --- /dev/null +++ b/tests/tcg/hexagon/brev.c @@ -0,0 +1,190 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <string.h> + +int err; + +#define NBITS 8 +#define SIZE (1 << NBITS) + +long long dbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +int wbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +short hbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +unsigned char bbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; + +/* + * We use the C preporcessor to deal with the combinations of types + */ + +#define BREV_LOAD(SZ, RES, ADDR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++m0:brev)\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(INC) \ + : "m0") + +#define BREV_LOAD_b(RES, ADDR, INC) \ + BREV_LOAD(b, RES, ADDR, INC) +#define BREV_LOAD_ub(RES, ADDR, INC) \ + BREV_LOAD(ub, RES, ADDR, INC) +#define BREV_LOAD_h(RES, ADDR, INC) \ + BREV_LOAD(h, RES, ADDR, INC) +#define BREV_LOAD_uh(RES, ADDR, INC) \ + BREV_LOAD(uh, RES, ADDR, INC) +#define BREV_LOAD_w(RES, ADDR, INC) \ + BREV_LOAD(w, RES, ADDR, INC) +#define BREV_LOAD_d(RES, ADDR, INC) \ + BREV_LOAD(d, RES, ADDR, INC) + +#define BREV_STORE(SZ, PART, ADDR, VAL, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "mem" #SZ "(%0++m0:brev) = %1" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(VAL), "r"(INC) \ + : "m0", "memory") + +#define BREV_STORE_b(ADDR, VAL, INC) \ + BREV_STORE(b, "", ADDR, VAL, INC) +#define BREV_STORE_h(ADDR, VAL, INC) \ + BREV_STORE(h, "", ADDR, VAL, INC) +#define BREV_STORE_f(ADDR, VAL, INC) \ + BREV_STORE(h, ".H", ADDR, VAL, INC) +#define BREV_STORE_w(ADDR, VAL, INC) \ + BREV_STORE(w, "", ADDR, VAL, INC) +#define BREV_STORE_d(ADDR, VAL, INC) \ + BREV_STORE(d, "", ADDR, VAL, INC) + +#define BREV_STORE_NEW(SZ, ADDR, VAL, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "{\n\t" \ + " r5 = %1\n\t" \ + " mem" #SZ "(%0++m0:brev) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(VAL), "r"(INC) \ + : "r5", "m0", "memory") + +#define BREV_STORE_bnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(b, ADDR, VAL, INC) +#define BREV_STORE_hnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(h, ADDR, VAL, INC) +#define BREV_STORE_wnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(w, ADDR, VAL, INC) + +int bitreverse(int x) +{ + int result = 0; + int i; + for (i = 0; i < NBITS; i++) { + result <<= 1; + result |= x & 1; + x >>= 1; + } + return result; +} + +int sext8(int x) +{ + return (x << 24) >> 24; +} + +void check(int i, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR(%d): 0x%04llx != 0x%04llx\n", i, result, expect); + err++; + } +} + +#define TEST_BREV_LOAD(SZ, TYPE, BUF, SHIFT, EXP) \ + do { \ + p = BUF; \ + for (i = 0; i < SIZE; i++) { \ + TYPE result; \ + BREV_LOAD_##SZ(result, p, 1 << (SHIFT - NBITS)); \ + check(i, result, EXP); \ + } \ + } while (0) + +#define TEST_BREV_STORE(SZ, TYPE, BUF, VAL, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, (TYPE)(VAL), 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +#define TEST_BREV_STORE_NEW(SZ, BUF, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, i, 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +/* + * We'll set high_half[i] = i << 16 for use in the .H form of store + * which stores from the high half of the word. + */ +int high_half[SIZE]; + +int main() +{ + void *p; + int i; + + for (i = 0; i < SIZE; i++) { + bbuf[i] = bitreverse(i); + hbuf[i] = bitreverse(i); + wbuf[i] = bitreverse(i); + dbuf[i] = bitreverse(i); + high_half[i] = i << 16; + } + + TEST_BREV_LOAD(b, int, bbuf, 16, sext8(i)); + TEST_BREV_LOAD(ub, int, bbuf, 16, i); + TEST_BREV_LOAD(h, int, hbuf, 15, i); + TEST_BREV_LOAD(uh, int, hbuf, 15, i); + TEST_BREV_LOAD(w, int, wbuf, 14, i); + TEST_BREV_LOAD(d, long long, dbuf, 13, i); + + TEST_BREV_STORE(b, int, bbuf, i, 16); + TEST_BREV_STORE(h, int, hbuf, i, 15); + TEST_BREV_STORE(f, int, hbuf, high_half[i], 15); + TEST_BREV_STORE(w, int, wbuf, i, 14); + TEST_BREV_STORE(d, long long, dbuf, i, 13); + + TEST_BREV_STORE_NEW(bnew, bbuf, 16); + TEST_BREV_STORE_NEW(hnew, hbuf, 15); + TEST_BREV_STORE_NEW(wnew, wbuf, 14); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c new file mode 100644 index 0000000..67a1aa3 --- /dev/null +++ b/tests/tcg/hexagon/circ.c @@ -0,0 +1,486 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +#define DEBUG 0 +#define DEBUG_PRINTF(...) \ + do { \ + if (DEBUG) { \ + printf(__VA_ARGS__); \ + } \ + } while (0) + + +#define NBYTES (1 << 8) +#define NHALFS (NBYTES / sizeof(short)) +#define NWORDS (NBYTES / sizeof(int)) +#define NDOBLS (NBYTES / sizeof(long long)) + +long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; +int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; +short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; +unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; + +/* + * We use the C preporcessor to deal with the combinations of types + */ + +#define INIT(BUF, N) \ + void init_##BUF(void) \ + { \ + int i; \ + for (i = 0; i < N; i++) { \ + BUF[i] = i; \ + } \ + } \ + +INIT(bbuf, NBYTES) +INIT(hbuf, NHALFS) +INIT(wbuf, NWORDS) +INIT(dbuf, NDOBLS) + +/* + * Macros for performing circular load + * RES result + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) + +/* + * The mreg has the following pieces + * mreg[31:28] increment[10:7] + * mreg[27:24] K value (used Hexagon v3 and earlier) + * mreg[23:17] increment[6:0] + * mreg[16:0] circular buffer length + */ +static int build_mreg(int inc, int K, int len) +{ + return ((inc & 0x780) << 21) | + ((K & 0xf) << 24) | + ((inc & 0x7f) << 17) | + (len & 0x1ffff); +} + +#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) + +/* + * Macros for performing circular store + * VAL value to store + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "{\n\t" \ + " r5 = %2\n\t" \ + " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "r5", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "m1", "cs1", "memory") +#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "{\n\t" \ + " r5 = %3\n\t" \ + " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "r5", "m1", "cs1", "memory") +#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) + + +int err; + +/* We'll test increments +1 and -1 */ +void check_load(int i, long long result, int inc, int size) +{ + int expect = (i * inc); + while (expect >= size) { + expect -= size; + } + while (expect < 0) { + expect += size; + } + if (result != expect) { + printf("ERROR(%d): %lld != %d\n", i, result, expect); + err++; + } +} + +#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ +void circ_test_load_imm_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 10; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ + } \ +} + +TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) +TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) + +#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ +void circ_test_load_reg_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 13; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, 1, size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, -1, size); \ + } \ +} + +TEST_LOAD_REG(b, char, bbuf, NBYTES, d) +TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) +TEST_LOAD_REG(h, short, hbuf, NHALFS, d) +TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) +TEST_LOAD_REG(w, int, wbuf, NWORDS, d) +TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) + +/* The circular stores will wrap around somewhere inside the buffer */ +#define CIRC_VAL(SZ, TYPE, BUFSIZE) \ +TYPE circ_val_##SZ(int i, int inc, int size) \ +{ \ + int mod = BUFSIZE % size; \ + int elem = i * inc; \ + if (elem < 0) { \ + if (-elem <= size - mod) { \ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE + size - mod); \ + } \ + } else if (elem < mod) {\ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE - size - mod); \ + } \ +} + +CIRC_VAL(b, unsigned char, NBYTES) +CIRC_VAL(h, short, NHALFS) +CIRC_VAL(w, int, NWORDS) +CIRC_VAL(d, long long, NDOBLS) + +/* + * Circular stores should only write to the first "size" elements of the buffer + * the remainder of the elements should have BUF[i] == i + */ +#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ +void check_store_##SZ(int inc, int size) \ +{ \ + int i; \ + for (i = 0; i < size; i++) { \ + DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + err++; \ + } \ + } \ + for (i = size; i < BUFSIZE; i++) { \ + if (BUF[i] != i) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ + err++; \ + } \ + } \ +} + +CHECK_STORE(b, bbuf, NBYTES, x) +CHECK_STORE(h, hbuf, NHALFS, x) +CHECK_STORE(w, wbuf, NWORDS, x) +CHECK_STORE(d, dbuf, NDOBLS, llx) + +#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ +void circ_test_store_imm_##SZ(void) \ +{ \ + unsigned int size = 27; \ + TYPE *p = BUF; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ + val++; \ + } \ + check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ + -(INC)); \ + val++; \ + } \ + check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ +} + +CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) +CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) +CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) +CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) + +#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ +void circ_test_store_reg_##SZ(void) \ +{ \ + TYPE *p = BUF; \ + unsigned int size = 19; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ + val++; \ + } \ + check_store_##CHK(1, size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ + val++; \ + } \ + check_store_##CHK(-1, size); \ +} + +CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) +CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) +CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) +CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) + +/* Test the old scheme used in Hexagon V3 */ +static void circ_test_v3(void) +{ + int *p = wbuf; + int size = 15; + int K = 4; /* 64 bytes */ + int element; + int i; + + init_wbuf(); + + for (i = 0; i < NWORDS; i++) { + __asm__( + "r4 = %2\n\t" + "m1 = r4\n\t" + "%0 = memw(%1++I:circ(M1))\n\t" + : "=r"(element), "+r"(p) + : "r"(build_mreg(1, K, size * sizeof(int))) + : "r4", "m1"); + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); + check_load(i, element, 1, size); + } +} + +int main() +{ + init_bbuf(); + init_hbuf(); + init_wbuf(); + init_dbuf(); + + DEBUG_PRINTF("NBYTES = %d\n", NBYTES); + DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); + DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); + DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); + DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); + + circ_test_load_imm_b(); + circ_test_load_imm_ub(); + circ_test_load_imm_h(); + circ_test_load_imm_uh(); + circ_test_load_imm_w(); + circ_test_load_imm_d(); + + circ_test_load_reg_b(); + circ_test_load_reg_ub(); + circ_test_load_reg_h(); + circ_test_load_reg_uh(); + circ_test_load_reg_w(); + circ_test_load_reg_d(); + + circ_test_store_imm_b(); + circ_test_store_imm_h(); + circ_test_store_imm_f(); + circ_test_store_imm_w(); + circ_test_store_imm_d(); + circ_test_store_imm_bnew(); + circ_test_store_imm_hnew(); + circ_test_store_imm_wnew(); + + circ_test_store_reg_b(); + circ_test_store_reg_h(); + circ_test_store_reg_f(); + circ_test_store_reg_w(); + circ_test_store_reg_d(); + circ_test_store_reg_bnew(); + circ_test_store_reg_hnew(); + circ_test_store_reg_wnew(); + + circ_test_v3(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index e4f1a0e..0dff429 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -37,10 +37,12 @@ const int SF_NaN = 0x7fc00000; const int SF_NaN_special = 0x7f800001; const int SF_ANY = 0x3f800000; const int SF_HEX_NAN = 0xffffffff; +const int SF_small_neg = 0xab98fba8; const long long DF_NaN = 0x7ff8000000000000ULL; const long long DF_ANY = 0x3f80000000000000ULL; const long long DF_HEX_NAN = 0xffffffffffffffffULL; +const long long DF_small_neg = 0xbd731f7500000000ULL; int err; @@ -248,6 +250,87 @@ static void check_dfminmax(void) check_fpstatus(usr, FPINVF); } +static void check_recip_exception(void) +{ + int result; + int usr; + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 non-zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + /* + * Check that sfrecipa properly sets divid-by-zero + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x885dc960), "r"(0x80000000) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, FPDBZF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x7f800000), "r"(SF_ZERO) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, 0); +} + static void check_canonical_NaN(void) { int sf_result; @@ -358,12 +441,171 @@ static void check_canonical_NaN(void) check_fpstatus(usr, 0); } +static void check_invsqrta(void) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(0x7f800000) + : "p0"); + check32(result, 0xff800000); + check32(predval, 0x0); +} + +static void check_float2int_convs() +{ + int res32; + long long res64; + int usr; + + /* + * Check that the various forms of float-to-unsigned + * check sign before rounding + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + /* + * Check that the various forms of float-to-signed return -1 for NaN + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); +} + int main() { check_compare_exception(); check_sfminmax(); check_dfminmax(); + check_recip_exception(); check_canonical_NaN(); + check_invsqrta(); + check_float2int_convs(); puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; diff --git a/tests/tcg/hexagon/load_align.c b/tests/tcg/hexagon/load_align.c new file mode 100644 index 0000000..12fc9cb --- /dev/null +++ b/tests/tcg/hexagon/load_align.c @@ -0,0 +1,415 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test load align instructions + * + * Example + * r1:0 = memh_fifo(r1+#0) + * loads a half word from memory, shifts the destination register + * right by one half word and inserts the loaded value into the high + * half word of the destination. + * + * There are 8 addressing modes and byte and half word variants, for a + * total of 16 instructions to test + */ + +#include <stdio.h> +#include <string.h> + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + buf[i] = i + 1; + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1+#" #OFF ")\n\t" \ + : "+r"(RES) \ + : "r"(ADDR)) +#define LOAD_io_b(RES, ADDR, OFF) \ + LOAD_io(b, RES, ADDR, OFF) +#define LOAD_io_h(RES, ADDR, OFF) \ + LOAD_io(h, RES, ADDR, OFF) + +#define TEST_io(NAME, SZ, SIZE, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_io_##SZ(result, buf, 0 * (SIZE)); \ + check(result, (EXP1)); \ + LOAD_io_##SZ(result, buf, 1 * (SIZE)); \ + check(result, (EXP2)); \ + LOAD_io_##SZ(result, buf, 2 * (SIZE)); \ + check(result, (EXP3)); \ + LOAD_io_##SZ(result, buf, 3 * (SIZE)); \ + check(result, (EXP4)); \ +} + +TEST_io(loadalignb_io, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_io(loadalignh_io, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "+r"(RES) \ + : "r"(IDX)) +#define LOAD_ur_b(RES, SHIFT, IDX) \ + LOAD_ur(b, RES, SHIFT, IDX) +#define LOAD_ur_h(RES, SHIFT, IDX) \ + LOAD_ur(h, RES, SHIFT, IDX) + +#define TEST_ur(NAME, SZ, SHIFT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_ur_##SZ(result, (SHIFT), 0); \ + check(result, (RES1)); \ + LOAD_ur_##SZ(result, (SHIFT), 1); \ + check(result, (RES2)); \ + LOAD_ur_##SZ(result, (SHIFT), 2); \ + check(result, (RES3)); \ + LOAD_ur_##SZ(result, (SHIFT), 3); \ + check(result, (RES4)); \ +} + +TEST_ur(loadalignb_ur, b, 1, + 0x01ffffffffffffffLL, 0x0301ffffffffffffLL, + 0x050301ffffffffffLL, 0x07050301ffffffffLL) +TEST_ur(loadalignh_ur, h, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1 = ##" #ADDR ")\n\t" \ + : "+r"(RES), "=r"(PTR)) +#define LOAD_ap_b(RES, PTR, ADDR) \ + LOAD_ap(b, RES, PTR, ADDR) +#define LOAD_ap_h(RES, PTR, ADDR) \ + LOAD_ap(h, RES, PTR, ADDR) + +#define TEST_ap(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr; \ + LOAD_ap_##SZ(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadalignb_ap, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_ap(loadalignh_ap, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define LOAD_pr_b(RES, PTR, INC) \ + LOAD_pr(b, RES, PTR, INC) +#define LOAD_pr_h(RES, PTR, INC) \ + LOAD_pr(h, RES, PTR, INC) + +#define TEST_pr(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadalignb_pr, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pr(loadalignh_pr, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0:brev)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define LOAD_pbr_b(RES, PTR) \ + LOAD_pbr(b, RES, PTR) +#define LOAD_pbr_h(RES, PTR) \ + LOAD_pbr(h, RES, PTR) + +#define TEST_pbr(NAME, SZ, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES1)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES2)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES3)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES4)); \ +} + +TEST_pbr(loadalignb_pbr, b, + 0x01ffffffffffffffLL, 0x0501ffffffffffffLL, + 0x030501ffffffffffLL, 0x07030501ffffffffLL) +TEST_pbr(loadalignh_pbr, h, + 0x0201ffffffffffffLL, 0x06050201ffffffffLL, + 0x040306050201ffffLL, 0x0807040306050201LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ")\n\t" \ + : "+r"(RES), "+r"(PTR)) +#define LOAD_pi_b(RES, PTR, INC) \ + LOAD_pi(b, RES, PTR, INC) +#define LOAD_pi_h(RES, PTR, INC) \ + LOAD_pi(h, RES, PTR, INC) + +#define TEST_pi(NAME, SZ, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadalignb_pi, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pi(loadalignh_pi, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ":circ(m0))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define LOAD_pci_b(RES, PTR, START, LEN, INC) \ + LOAD_pci(b, RES, PTR, START, LEN, INC) +#define LOAD_pci_h(RES, PTR, START, LEN, INC) \ + LOAD_pci(h, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, SZ, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadalignb_pci, b, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pci(loadalignh_pci, h, 4, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "_fifo(%1++I:circ(m1))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define LOAD_pcr_b(RES, PTR, START, LEN, INC) \ + LOAD_pcr(b, RES, PTR, START, LEN, INC) +#define LOAD_pcr_h(RES, PTR, START, LEN, INC) \ + LOAD_pcr(h, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, SZ, SIZE, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadalignb_pcr, b, 1, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pcr(loadalignh_pcr, h, 2, 4, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +int main() +{ + init_buf(); + + test_loadalignb_io(); + test_loadalignh_io(); + + test_loadalignb_ur(); + test_loadalignh_ur(); + + test_loadalignb_ap(); + test_loadalignh_ap(); + + test_loadalignb_pr(); + test_loadalignh_pr(); + + test_loadalignb_pbr(); + test_loadalignh_pbr(); + + test_loadalignb_pi(); + test_loadalignh_pi(); + + test_loadalignb_pci(); + test_loadalignh_pci(); + + test_loadalignb_pcr(); + test_loadalignh_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c new file mode 100644 index 0000000..3575a37 --- /dev/null +++ b/tests/tcg/hexagon/load_unpack.c @@ -0,0 +1,474 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test load unpack instructions + * + * Example + * r0 = memubh(r1+#0) + * loads a half word from memory and zero-extends the 2 bytes to form a word + * + * For each addressing mode, there are 4 tests + * bzw2 unsigned 2 elements + * bsw2 signed 2 elements + * bzw4 unsigned 4 elements + * bsw4 signed 4 elements + * There are 8 addressing modes, for a total of 32 instructions to test + */ + +#include <stdio.h> +#include <string.h> + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + int sign = i % 2 == 0 ? 0x80 : 0; + buf[i] = sign | (i + 1); + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08llx != 0x%08llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \ + : "=r"(RES) \ + : "r"(ADDR)) +#define BxW_LOAD_io_Z(RES, ADDR, OFF) \ + BxW_LOAD_io(ubh, RES, ADDR, OFF) +#define BxW_LOAD_io_S(RES, ADDR, OFF) \ + BxW_LOAD_io(bh, RES, ADDR, OFF) + +#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \ + check(result, (EXP1) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \ + check(result, (EXP2) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \ + check(result, (EXP3) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \ + check(result, (EXP4) | (EXT)); \ +} + + +TEST_io(loadbzw2_io, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbzw4_io, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_io(loadbsw4_io, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "=r"(RES) \ + : "r"(IDX)) +#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \ + BxW_LOAD_ur(ubh, RES, SHIFT, IDX) +#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \ + BxW_LOAD_ur(bh, RES, SHIFT, IDX) + +#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \ + check(result, (RES4) | (EXT)); \ +} \ + +TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \ + : "=r"(RES), "=r"(PTR)) +#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \ + BxW_LOAD_ap(ubh, RES, PTR, ADDR) +#define BxW_LOAD_ap_S(RES, PTR, ADDR) \ + BxW_LOAD_ap(bh, RES, PTR, ADDR) + +#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr; \ + init_buf(); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define BxW_LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++m0)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define BxW_LOAD_pr_Z(RES, PTR, INC) \ + BxW_LOAD_pr(ubh, RES, PTR, INC) +#define BxW_LOAD_pr_S(RES, PTR, INC) \ + BxW_LOAD_pr(bh, RES, PTR, INC) + +#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define BxW_LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "(%1++m0:brev)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define BxW_LOAD_pbr_Z(RES, PTR) \ + BxW_LOAD_pbr(ubh, RES, PTR) +#define BxW_LOAD_pbr_S(RES, PTR) \ + BxW_LOAD_pbr(bh, RES, PTR) + +#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES4) | (EXT)); \ +} + +TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) +TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define BxW_LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \ + : "=r"(RES), "+r"(PTR)) +#define BxW_LOAD_pi_Z(RES, PTR, INC) \ + BxW_LOAD_pi(ubh, RES, PTR, INC) +#define BxW_LOAD_pi_S(RES, PTR, INC) \ + BxW_LOAD_pi(bh, RES, PTR, INC) + +#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \ + EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +int main() +{ + test_loadbzw2_io(); + test_loadbsw2_io(); + test_loadbzw4_io(); + test_loadbsw4_io(); + + test_loadbzw2_ur(); + test_loadbsw2_ur(); + test_loadbzw4_ur(); + test_loadbsw4_ur(); + + test_loadbzw2_ap(); + test_loadbsw2_ap(); + test_loadbzw4_ap(); + test_loadbsw4_ap(); + + test_loadbzw2_pr(); + test_loadbsw2_pr(); + test_loadbzw4_pr(); + test_loadbsw4_pr(); + + test_loadbzw2_pbr(); + test_loadbsw2_pbr(); + test_loadbzw4_pbr(); + test_loadbsw4_pbr(); + + test_loadbzw2_pi(); + test_loadbsw2_pi(); + test_loadbzw4_pi(); + test_loadbsw4_pi(); + + test_loadbzw2_pci(); + test_loadbsw2_pci(); + test_loadbzw4_pci(); + test_loadbsw4_pci(); + + test_loadbzw2_pcr(); + test_loadbsw2_pcr(); + test_loadbzw4_pcr(); + test_loadbsw4_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index 458759f..17c3919 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -231,6 +231,14 @@ static void check(int val, int expect) } } +static void check64(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; uint32_t array[10]; @@ -264,8 +272,36 @@ static long long creg_pair(int x, int y) return retval; } +static long long decbin(long long x, long long y, int *pred) +{ + long long retval; + asm ("%0 = decbin(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(retval), "=r"(*pred) + : "r"(x), "r"(y)); + return retval; +} + +/* Check that predicates are auto-and'ed in a packet */ +static int auto_and(void) +{ + int retval; + asm ("r5 = #1\n\t" + "{\n\t" + " p0 = cmp.eq(r1, #1)\n\t" + " p0 = cmp.eq(r1, #2)\n\t" + "}\n\t" + "%0 = p0\n\t" + : "=r"(retval) + : + : "r5", "p0"); + return retval; +} + int main() { + long long res64; + int pred; memcpy(array, init, sizeof(array)); S4_storerhnew_rr(array, 4, 0xffff); @@ -375,6 +411,17 @@ int main() res = test_clrtnew(2, 7); check(res, 7); + res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); + check64(res64, 0x357980003700010cLL); + check(pred, 0); + + res64 = decbin(0xfLL, 0x1bLL, &pred); + check64(res64, 0x78000100LL); + check(pred, 1); + + res = auto_and(); + check(res, 0); + puts(err ? "FAIL" : "PASS"); return err; } diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c new file mode 100644 index 0000000..52997b3 --- /dev/null +++ b/tests/tcg/hexagon/multi_result.c @@ -0,0 +1,282 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +static int sfrecipa(int Rs, int Rt, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs), "r"(Rt) + : "p0"); + *pred_result = predval; + return result; +} + +static int sfinvsqrta(int Rs, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs) + : "p0"); + *pred_result = predval; + return result; +} + +static long long vacsh(long long Rxx, long long Rss, long long Rtt, + int *pred_result, int *ovf_result) +{ + long long result = Rxx; + int predval; + int usr; + + /* + * This instruction can set bit 0 (OVF/overflow) in usr + * Clear the bit first, then return that bit to the caller + */ + asm volatile("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "%0,p0 = vacsh(%3, %4)\n\t" + "%1 = p0\n\t" + "%2 = usr\n\t" + : "+r"(result), "=r"(predval), "=r"(usr) + : "r"(Rss), "r"(Rtt) + : "r2", "p0", "usr"); + *pred_result = predval; + *ovf_result = (usr & 1); + return result; +} + +static long long vminub(long long Rtt, long long Rss, + int *pred_result) +{ + long long result; + int predval; + + asm volatile("%0,p0 = vminub(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(result), "=r"(predval) + : "r"(Rtt), "r"(Rss) + : "p0"); + *pred_result = predval; + return result; +} + +static long long add_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = %1\n\t" + "%0 = add(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + +static long long sub_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = !cmp.eq(%1, #0)\n\t" + "%0 = sub(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + +int err; + +static void check_ll(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + +static void check(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", val, expect); + err++; + } +} + +static void check_p(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%02x != 0x%02x\n", val, expect); + err++; + } +} + +static void test_sfrecipa() +{ + int res; + int pred_result; + + res = sfrecipa(0x04030201, 0x05060708, &pred_result); + check(res, 0x59f38001); + check_p(pred_result, 0x00); +} + +static void test_sfinvsqrta() +{ + int res; + int pred_result; + + res = sfinvsqrta(0x04030201, &pred_result); + check(res, 0x4d330000); + check_p(pred_result, 0xe0); + + res = sfinvsqrta(0x0, &pred_result); + check(res, 0x3f800000); + check_p(pred_result, 0x0); +} + +static void test_vacsh() +{ + long long res64; + int pred_result; + int ovf_result; + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x0000000000000000LL, &pred_result, &ovf_result); + check_ll(res64, 0x0004000300030004LL); + check_p(pred_result, 0xf0); + check(ovf_result, 0); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x000affff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0004LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); + + res64 = vacsh(0x00047fff00020001LL, + 0x00017fff00030004LL, + 0x000a0fff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e7fff000f0004LL); + check_p(pred_result, 0xfc); + check(ovf_result, 1); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030009LL, + 0x000affff000d0001LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0008LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); +} + +static void test_vminub() +{ + long long res64; + int pred_result; + + res64 = vminub(0x0807060504030201LL, + 0x0102030405060708LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xf0); + + res64 = vminub(0x0802060405030701LL, + 0x0107030504060208LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xaa); +} + +static void test_add_carry() +{ + long long res64; + int pred_result; + + res64 = add_carry(0x0000000000000000LL, + 0xffffffffffffffffLL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + +static void test_sub_carry() +{ + long long res64; + int pred_result; + + res64 = sub_carry(0x0000000000000000LL, + 0x0000000000000000LL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + +int main() +{ + test_sfrecipa(); + test_sfinvsqrta(); + test_vacsh(); + test_vminub(); + test_add_carry(); + test_sub_carry(); + + puts(err ? "FAIL" : "PASS"); + return err; +} |