aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fpu/softfloat-specialize.c.inc3
-rw-r--r--linux-user/hexagon/cpu_loop.c2
-rw-r--r--target/hexagon/arch.c181
-rw-r--r--target/hexagon/arch.h9
-rw-r--r--target/hexagon/conv_emu.c177
-rw-r--r--target/hexagon/conv_emu.h31
-rw-r--r--target/hexagon/cpu.c14
-rw-r--r--target/hexagon/cpu.h5
-rw-r--r--target/hexagon/cpu_bits.h2
-rw-r--r--target/hexagon/decode.c84
-rw-r--r--target/hexagon/fma_emu.c40
-rw-r--r--target/hexagon/gen_tcg.h424
-rwxr-xr-xtarget/hexagon/gen_tcg_funcs.py2
-rw-r--r--target/hexagon/genptr.c233
-rw-r--r--target/hexagon/helper.h23
-rw-r--r--target/hexagon/iclass.c4
-rw-r--r--target/hexagon/imported/alu.idef44
-rw-r--r--target/hexagon/imported/compare.idef12
-rw-r--r--target/hexagon/imported/encode_pp.def30
-rw-r--r--target/hexagon/imported/float.idef32
-rw-r--r--target/hexagon/imported/ldst.idef68
-rwxr-xr-xtarget/hexagon/imported/macros.def47
-rw-r--r--target/hexagon/imported/shift.idef47
-rw-r--r--target/hexagon/insn.h21
-rw-r--r--target/hexagon/internal.h11
-rw-r--r--target/hexagon/macros.h118
-rw-r--r--target/hexagon/meson.build1
-rw-r--r--target/hexagon/op_helper.c392
-rw-r--r--target/hexagon/reg_fields.c3
-rw-r--r--target/hexagon/reg_fields.h4
-rw-r--r--target/hexagon/translate.c175
-rw-r--r--target/hexagon/translate.h9
-rw-r--r--tests/tcg/hexagon/Makefile.target6
-rw-r--r--tests/tcg/hexagon/brev.c190
-rw-r--r--tests/tcg/hexagon/circ.c486
-rw-r--r--tests/tcg/hexagon/fpstuff.c242
-rw-r--r--tests/tcg/hexagon/load_align.c415
-rw-r--r--tests/tcg/hexagon/load_unpack.c474
-rw-r--r--tests/tcg/hexagon/misc.c47
-rw-r--r--tests/tcg/hexagon/multi_result.c282
40 files changed, 3754 insertions, 636 deletions
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index c2f87ad..9ea318f 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -145,6 +145,9 @@ static FloatParts parts_default_nan(float_status *status)
#elif defined(TARGET_HPPA)
/* snan_bit_is_one, set msb-1. */
frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2);
+#elif defined(TARGET_HEXAGON)
+ sign = 1;
+ frac = ~0ULL;
#else
/* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
* S390, SH4, TriCore, and Xtensa. I cannot find documentation
diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c
index 9a68ca0..bc34f5d 100644
--- a/linux-user/hexagon/cpu_loop.c
+++ b/linux-user/hexagon/cpu_loop.c
@@ -25,7 +25,7 @@
void cpu_loop(CPUHexagonState *env)
{
- CPUState *cs = CPU(hexagon_env_get_cpu(env));
+ CPUState *cs = env_cpu(env);
int trapnr, signum, sigcode;
target_ulong sigaddr;
target_ulong syscallnum;
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index 09de124..68a55b3 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -27,6 +27,97 @@
#define SF_MANTBITS 23
#define float32_nan make_float32(0xffffffff)
+/*
+ * These three tables are used by the cabacdecbin instruction
+ */
+const uint8_t rLPS_table_64x4[64][4] = {
+ {128, 176, 208, 240},
+ {128, 167, 197, 227},
+ {128, 158, 187, 216},
+ {123, 150, 178, 205},
+ {116, 142, 169, 195},
+ {111, 135, 160, 185},
+ {105, 128, 152, 175},
+ {100, 122, 144, 166},
+ {95, 116, 137, 158},
+ {90, 110, 130, 150},
+ {85, 104, 123, 142},
+ {81, 99, 117, 135},
+ {77, 94, 111, 128},
+ {73, 89, 105, 122},
+ {69, 85, 100, 116},
+ {66, 80, 95, 110},
+ {62, 76, 90, 104},
+ {59, 72, 86, 99},
+ {56, 69, 81, 94},
+ {53, 65, 77, 89},
+ {51, 62, 73, 85},
+ {48, 59, 69, 80},
+ {46, 56, 66, 76},
+ {43, 53, 63, 72},
+ {41, 50, 59, 69},
+ {39, 48, 56, 65},
+ {37, 45, 54, 62},
+ {35, 43, 51, 59},
+ {33, 41, 48, 56},
+ {32, 39, 46, 53},
+ {30, 37, 43, 50},
+ {29, 35, 41, 48},
+ {27, 33, 39, 45},
+ {26, 31, 37, 43},
+ {24, 30, 35, 41},
+ {23, 28, 33, 39},
+ {22, 27, 32, 37},
+ {21, 26, 30, 35},
+ {20, 24, 29, 33},
+ {19, 23, 27, 31},
+ {18, 22, 26, 30},
+ {17, 21, 25, 28},
+ {16, 20, 23, 27},
+ {15, 19, 22, 25},
+ {14, 18, 21, 24},
+ {14, 17, 20, 23},
+ {13, 16, 19, 22},
+ {12, 15, 18, 21},
+ {12, 14, 17, 20},
+ {11, 14, 16, 19},
+ {11, 13, 15, 18},
+ {10, 12, 15, 17},
+ {10, 12, 14, 16},
+ {9, 11, 13, 15},
+ {9, 11, 12, 14},
+ {8, 10, 12, 14},
+ {8, 9, 11, 13},
+ {7, 9, 11, 12},
+ {7, 9, 10, 12},
+ {7, 8, 10, 11},
+ {6, 8, 9, 11},
+ {6, 7, 9, 10},
+ {6, 7, 8, 9},
+ {2, 2, 2, 2}
+};
+
+const uint8_t AC_next_state_MPS_64[64] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+ 61, 62, 62, 63
+};
+
+
+const uint8_t AC_next_state_LPS_64[64] = {
+ 0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
+ 8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
+ 16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
+ 23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
+ 29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
+ 34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
+ 37, 38, 38, 63
+};
+
#define BITS_MASK_8 0x5555555555555555ULL
#define PAIR_MASK_8 0x3333333333333333ULL
#define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
@@ -76,19 +167,6 @@ uint64_t deinterleave(uint64_t src)
return myeven | (myodd << 32);
}
-uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c)
-{
- uint64_t tmpa, tmpb, tmpc;
- tmpa = fGETUWORD(0, a);
- tmpb = fGETUWORD(0, b);
- tmpc = tmpa + tmpb + c;
- tmpa = fGETUWORD(1, a);
- tmpb = fGETUWORD(1, b);
- tmpc = tmpa + tmpb + fGETUWORD(1, tmpc);
- tmpc = fGETUWORD(1, tmpc);
- return tmpc;
-}
-
int32_t conv_round(int32_t a, int n)
{
int64_t val;
@@ -108,7 +186,7 @@ int32_t conv_round(int32_t a, int n)
/* Floating Point Stuff */
-static const int softfloat_roundingmodes[] = {
+static const FloatRoundMode softfloat_roundingmodes[] = {
float_round_nearest_even,
float_round_to_zero,
float_round_down,
@@ -156,12 +234,6 @@ void arch_fpop_end(CPUHexagonState *env)
}
}
-static float32 float32_mul_pow2(float32 a, uint32_t p, float_status *fp_status)
-{
- float32 b = make_float32((SF_BIAS + p) << SF_MANTBITS);
- return float32_mul(a, b, fp_status);
-}
-
int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
float_status *fp_status)
{
@@ -200,12 +272,13 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
/* or put Inf in num fixup? */
uint8_t RsV_sign = float32_is_neg(RsV);
uint8_t RtV_sign = float32_is_neg(RtV);
+ /* Check that RsV is NOT infinite before we overwrite it */
+ if (!float32_is_infinity(RsV)) {
+ float_raise(float_flag_divbyzero, fp_status);
+ }
RsV = infinite_float32(RsV_sign ^ RtV_sign);
RtV = float32_one;
RdV = float32_one;
- if (float32_is_infinity(RsV)) {
- float_raise(float_flag_divbyzero, fp_status);
- }
} else if (float32_is_infinity(RtV)) {
RsV = make_float32(0x80000000 & (RsV ^ RtV));
RtV = float32_one;
@@ -230,22 +303,22 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
/* Near quotient underflow / inexact Q */
PeV = 0x80;
- RtV = float32_mul_pow2(RtV, -64, fp_status);
- RsV = float32_mul_pow2(RsV, 64, fp_status);
+ RtV = float32_scalbn(RtV, -64, fp_status);
+ RsV = float32_scalbn(RsV, 64, fp_status);
} else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
/* Near quotient overflow */
PeV = 0x40;
- RtV = float32_mul_pow2(RtV, 32, fp_status);
- RsV = float32_mul_pow2(RsV, -32, fp_status);
+ RtV = float32_scalbn(RtV, 32, fp_status);
+ RsV = float32_scalbn(RsV, -32, fp_status);
} else if (n_exp <= SF_MANTBITS + 2) {
- RtV = float32_mul_pow2(RtV, 64, fp_status);
- RsV = float32_mul_pow2(RsV, 64, fp_status);
+ RtV = float32_scalbn(RtV, 64, fp_status);
+ RsV = float32_scalbn(RsV, 64, fp_status);
} else if (d_exp <= 1) {
- RtV = float32_mul_pow2(RtV, 32, fp_status);
- RsV = float32_mul_pow2(RsV, 32, fp_status);
+ RtV = float32_scalbn(RtV, 32, fp_status);
+ RsV = float32_scalbn(RsV, 32, fp_status);
} else if (d_exp > 252) {
- RtV = float32_mul_pow2(RtV, -32, fp_status);
- RsV = float32_mul_pow2(RsV, -32, fp_status);
+ RtV = float32_scalbn(RtV, -32, fp_status);
+ RsV = float32_scalbn(RsV, -32, fp_status);
}
RdV = 0;
ret = 1;
@@ -265,7 +338,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
int r_exp;
int ret = 0;
RsV = *Rs;
- if (float32_is_infinity(RsV)) {
+ if (float32_is_any_nan(RsV)) {
if (extract32(RsV, 22, 1) == 0) {
float_raise(float_flag_invalid, fp_status);
}
@@ -287,7 +360,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
/* Basic checks passed */
r_exp = float32_getexp(RsV);
if (r_exp <= 24) {
- RsV = float32_mul_pow2(RsV, 64, fp_status);
+ RsV = float32_scalbn(RsV, 64, fp_status);
PeV = 0xe0;
}
RdV = 0;
@@ -298,3 +371,41 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
*adjust = PeV;
return ret;
}
+
+const uint8_t recip_lookup_table[128] = {
+ 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
+ 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
+ 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
+ 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
+ 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
+ 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
+ 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
+ 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
+ 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
+ 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
+ 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
+ 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
+ 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
+ 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
+ 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
+ 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
+};
+
+const uint8_t invsqrt_lookup_table[128] = {
+ 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
+ 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
+ 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
+ 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
+ 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
+ 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
+ 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
+ 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
+ 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
+ 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
+ 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
+ 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
+ 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
+ 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
+ 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
+ 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
+};
diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h
index 1f7f036..7091806 100644
--- a/target/hexagon/arch.h
+++ b/target/hexagon/arch.h
@@ -20,9 +20,12 @@
#include "qemu/int128.h"
+extern const uint8_t rLPS_table_64x4[64][4];
+extern const uint8_t AC_next_state_MPS_64[64];
+extern const uint8_t AC_next_state_LPS_64[64];
+
uint64_t interleave(uint32_t odd, uint32_t even);
uint64_t deinterleave(uint64_t src);
-uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c);
int32_t conv_round(int32_t a, int n);
void arch_fpop_start(CPUHexagonState *env);
void arch_fpop_end(CPUHexagonState *env);
@@ -31,4 +34,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd,
int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
float_status *fp_status);
+extern const uint8_t recip_lookup_table[128];
+
+extern const uint8_t invsqrt_lookup_table[128];
+
#endif
diff --git a/target/hexagon/conv_emu.c b/target/hexagon/conv_emu.c
deleted file mode 100644
index 3985b10..0000000
--- a/target/hexagon/conv_emu.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/host-utils.h"
-#include "fpu/softfloat.h"
-#include "macros.h"
-#include "conv_emu.h"
-
-#define LL_MAX_POS 0x7fffffffffffffffULL
-#define MAX_POS 0x7fffffffU
-
-static uint64_t conv_f64_to_8u_n(float64 in, int will_negate,
- float_status *fp_status)
-{
- uint8_t sign = float64_is_neg(in);
- if (float64_is_infinity(in)) {
- float_raise(float_flag_invalid, fp_status);
- if (float64_is_neg(in)) {
- return 0ULL;
- } else {
- return ~0ULL;
- }
- }
- if (float64_is_any_nan(in)) {
- float_raise(float_flag_invalid, fp_status);
- return ~0ULL;
- }
- if (float64_is_zero(in)) {
- return 0;
- }
- if (sign) {
- float_raise(float_flag_invalid, fp_status);
- return 0;
- }
- if (float64_lt(in, float64_half, fp_status)) {
- /* Near zero, captures large fracshifts, denorms, etc */
- float_raise(float_flag_inexact, fp_status);
- switch (get_float_rounding_mode(fp_status)) {
- case float_round_down:
- if (will_negate) {
- return 1;
- } else {
- return 0;
- }
- case float_round_up:
- if (!will_negate) {
- return 1;
- } else {
- return 0;
- }
- default:
- return 0; /* nearest or towards zero */
- }
- }
- return float64_to_uint64(in, fp_status);
-}
-
-static void clr_float_exception_flags(uint8_t flag, float_status *fp_status)
-{
- uint8_t flags = fp_status->float_exception_flags;
- flags &= ~flag;
- set_float_exception_flags(flags, fp_status);
-}
-
-static uint32_t conv_df_to_4u_n(float64 fp64, int will_negate,
- float_status *fp_status)
-{
- uint64_t tmp;
- tmp = conv_f64_to_8u_n(fp64, will_negate, fp_status);
- if (tmp > 0x00000000ffffffffULL) {
- clr_float_exception_flags(float_flag_inexact, fp_status);
- float_raise(float_flag_invalid, fp_status);
- return ~0U;
- }
- return (uint32_t)tmp;
-}
-
-uint64_t conv_df_to_8u(float64 in, float_status *fp_status)
-{
- return conv_f64_to_8u_n(in, 0, fp_status);
-}
-
-uint32_t conv_df_to_4u(float64 in, float_status *fp_status)
-{
- return conv_df_to_4u_n(in, 0, fp_status);
-}
-
-int64_t conv_df_to_8s(float64 in, float_status *fp_status)
-{
- uint8_t sign = float64_is_neg(in);
- uint64_t tmp;
- if (float64_is_any_nan(in)) {
- float_raise(float_flag_invalid, fp_status);
- return -1;
- }
- if (sign) {
- float64 minus_fp64 = float64_abs(in);
- tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status);
- } else {
- tmp = conv_f64_to_8u_n(in, 0, fp_status);
- }
- if (tmp > (LL_MAX_POS + sign)) {
- clr_float_exception_flags(float_flag_inexact, fp_status);
- float_raise(float_flag_invalid, fp_status);
- tmp = (LL_MAX_POS + sign);
- }
- if (sign) {
- return -tmp;
- } else {
- return tmp;
- }
-}
-
-int32_t conv_df_to_4s(float64 in, float_status *fp_status)
-{
- uint8_t sign = float64_is_neg(in);
- uint64_t tmp;
- if (float64_is_any_nan(in)) {
- float_raise(float_flag_invalid, fp_status);
- return -1;
- }
- if (sign) {
- float64 minus_fp64 = float64_abs(in);
- tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status);
- } else {
- tmp = conv_f64_to_8u_n(in, 0, fp_status);
- }
- if (tmp > (MAX_POS + sign)) {
- clr_float_exception_flags(float_flag_inexact, fp_status);
- float_raise(float_flag_invalid, fp_status);
- tmp = (MAX_POS + sign);
- }
- if (sign) {
- return -tmp;
- } else {
- return tmp;
- }
-}
-
-uint64_t conv_sf_to_8u(float32 in, float_status *fp_status)
-{
- float64 fp64 = float32_to_float64(in, fp_status);
- return conv_df_to_8u(fp64, fp_status);
-}
-
-uint32_t conv_sf_to_4u(float32 in, float_status *fp_status)
-{
- float64 fp64 = float32_to_float64(in, fp_status);
- return conv_df_to_4u(fp64, fp_status);
-}
-
-int64_t conv_sf_to_8s(float32 in, float_status *fp_status)
-{
- float64 fp64 = float32_to_float64(in, fp_status);
- return conv_df_to_8s(fp64, fp_status);
-}
-
-int32_t conv_sf_to_4s(float32 in, float_status *fp_status)
-{
- float64 fp64 = float32_to_float64(in, fp_status);
- return conv_df_to_4s(fp64, fp_status);
-}
diff --git a/target/hexagon/conv_emu.h b/target/hexagon/conv_emu.h
deleted file mode 100644
index cade9de..0000000
--- a/target/hexagon/conv_emu.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HEXAGON_CONV_EMU_H
-#define HEXAGON_CONV_EMU_H
-
-uint64_t conv_sf_to_8u(float32 in, float_status *fp_status);
-uint32_t conv_sf_to_4u(float32 in, float_status *fp_status);
-int64_t conv_sf_to_8s(float32 in, float_status *fp_status);
-int32_t conv_sf_to_4s(float32 in, float_status *fp_status);
-
-uint64_t conv_df_to_8u(float64 in, float_status *fp_status);
-uint32_t conv_df_to_4u(float64 in, float_status *fp_status);
-int64_t conv_df_to_8s(float64 in, float_status *fp_status);
-int32_t conv_df_to_4s(float64 in, float_status *fp_status);
-
-#endif
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index b0b3040..ff44fd6 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -23,6 +23,7 @@
#include "exec/exec-all.h"
#include "qapi/error.h"
#include "hw/qdev-properties.h"
+#include "fpu/softfloat-helpers.h"
static void hexagon_v67_cpu_init(Object *obj)
{
@@ -69,10 +70,9 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
* stacks at different locations. This is used to compensate so the diff is
* cleaner.
*/
-static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env,
- target_ulong addr)
+static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr)
{
- HexagonCPU *cpu = container_of(env, HexagonCPU, env);
+ HexagonCPU *cpu = env_archcpu(env);
target_ulong stack_adjust = cpu->lldb_stack_adjust;
target_ulong stack_start = env->stack_start;
target_ulong stack_size = 0x10000;
@@ -88,7 +88,7 @@ static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env,
}
/* HEX_REG_P3_0 (aka C4) is an alias for the predicate registers */
-static inline target_ulong read_p3_0(CPUHexagonState *env)
+static target_ulong read_p3_0(CPUHexagonState *env)
{
int32_t control_reg = 0;
int i;
@@ -116,7 +116,7 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum)
static void hexagon_dump(CPUHexagonState *env, FILE *f)
{
- HexagonCPU *cpu = container_of(env, HexagonCPU, env);
+ HexagonCPU *cpu = env_archcpu(env);
if (cpu->lldb_compat) {
/*
@@ -206,8 +206,12 @@ static void hexagon_cpu_reset(DeviceState *dev)
CPUState *cs = CPU(dev);
HexagonCPU *cpu = HEXAGON_CPU(cs);
HexagonCPUClass *mcc = HEXAGON_CPU_GET_CLASS(cpu);
+ CPUHexagonState *env = &cpu->env;
mcc->parent_reset(dev);
+
+ set_default_nan_mode(1, &env->fp_status);
+ set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
}
static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info)
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index e04eac5..2855dd3 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -127,11 +127,6 @@ typedef struct HexagonCPU {
target_ulong lldb_stack_adjust;
} HexagonCPU;
-static inline HexagonCPU *hexagon_env_get_cpu(CPUHexagonState *env)
-{
- return container_of(env, HexagonCPU, env);
-}
-
#include "cpu_bits.h"
#define cpu_signal_handler cpu_hexagon_signal_handler
diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h
index 96af834..96fef71 100644
--- a/target/hexagon/cpu_bits.h
+++ b/target/hexagon/cpu_bits.h
@@ -47,7 +47,7 @@ static inline uint32_t iclass_bits(uint32_t encoding)
return iclass;
}
-static inline int is_packet_end(uint32_t endocing)
+static inline bool is_packet_end(uint32_t endocing)
{
uint32_t bits = parse_bits(endocing);
return ((bits == 0x3) || (bits == 0x0));
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index c9bacaa..dffe1d1 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -48,8 +48,8 @@ enum {
DEF_REGMAP(R_16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)
DEF_REGMAP(R__8, 8, 0, 2, 4, 6, 16, 18, 20, 22)
-#define DECODE_MAPPED_REG(REGNO, NAME) \
- insn->regno[REGNO] = DECODE_REGISTER_##NAME[insn->regno[REGNO]];
+#define DECODE_MAPPED_REG(OPNUM, NAME) \
+ insn->regno[OPNUM] = DECODE_REGISTER_##NAME[insn->regno[OPNUM]];
typedef struct {
const struct DectreeTable *table_link;
@@ -340,8 +340,8 @@ static void decode_split_cmpjump(Packet *pkt)
if (GET_ATTRIB(pkt->insn[i].opcode, A_NEWCMPJUMP)) {
last = pkt->num_insns;
pkt->insn[last] = pkt->insn[i]; /* copy the instruction */
- pkt->insn[last].part1 = 1; /* last instruction does the CMP */
- pkt->insn[i].part1 = 0; /* existing instruction does the JUMP */
+ pkt->insn[last].part1 = true; /* last insn does the CMP */
+ pkt->insn[i].part1 = false; /* existing insn does the JUMP */
pkt->num_insns++;
}
}
@@ -354,7 +354,7 @@ static void decode_split_cmpjump(Packet *pkt)
}
}
-static inline int decode_opcode_can_jump(int opcode)
+static bool decode_opcode_can_jump(int opcode)
{
if ((GET_ATTRIB(opcode, A_JUMP)) ||
(GET_ATTRIB(opcode, A_CALL)) ||
@@ -362,15 +362,15 @@ static inline int decode_opcode_can_jump(int opcode)
(opcode == J2_pause)) {
/* Exception to A_JUMP attribute */
if (opcode == J4_hintjumpr) {
- return 0;
+ return false;
}
- return 1;
+ return true;
}
- return 0;
+ return false;
}
-static inline int decode_opcode_ends_loop(int opcode)
+static bool decode_opcode_ends_loop(int opcode)
{
return GET_ATTRIB(opcode, A_HWLOOP0_END) ||
GET_ATTRIB(opcode, A_HWLOOP1_END);
@@ -383,9 +383,9 @@ static void decode_set_insn_attr_fields(Packet *pkt)
int numinsns = pkt->num_insns;
uint16_t opcode;
- pkt->pkt_has_cof = 0;
- pkt->pkt_has_endloop = 0;
- pkt->pkt_has_dczeroa = 0;
+ pkt->pkt_has_cof = false;
+ pkt->pkt_has_endloop = false;
+ pkt->pkt_has_dczeroa = false;
for (i = 0; i < numinsns; i++) {
opcode = pkt->insn[i].opcode;
@@ -394,14 +394,14 @@ static void decode_set_insn_attr_fields(Packet *pkt)
}
if (GET_ATTRIB(opcode, A_DCZEROA)) {
- pkt->pkt_has_dczeroa = 1;
+ pkt->pkt_has_dczeroa = true;
}
if (GET_ATTRIB(opcode, A_STORE)) {
if (pkt->insn[i].slot == 0) {
- pkt->pkt_has_store_s0 = 1;
+ pkt->pkt_has_store_s0 = true;
} else {
- pkt->pkt_has_store_s1 = 1;
+ pkt->pkt_has_store_s1 = true;
}
}
@@ -422,9 +422,9 @@ static void decode_set_insn_attr_fields(Packet *pkt)
*/
static void decode_shuffle_for_execution(Packet *packet)
{
- int changed = 0;
+ bool changed = false;
int i;
- int flag; /* flag means we've seen a non-memory instruction */
+ bool flag; /* flag means we've seen a non-memory instruction */
int n_mems;
int last_insn = packet->num_insns - 1;
@@ -437,7 +437,7 @@ static void decode_shuffle_for_execution(Packet *packet)
}
do {
- changed = 0;
+ changed = false;
/*
* Stores go last, must not reorder.
* Cannot shuffle stores past loads, either.
@@ -445,13 +445,13 @@ static void decode_shuffle_for_execution(Packet *packet)
* then a store, shuffle the store to the front. Don't shuffle
* stores wrt each other or a load.
*/
- for (flag = n_mems = 0, i = last_insn; i >= 0; i--) {
+ for (flag = false, n_mems = 0, i = last_insn; i >= 0; i--) {
int opcode = packet->insn[i].opcode;
if (flag && GET_ATTRIB(opcode, A_STORE)) {
decode_send_insn_to(packet, i, last_insn - n_mems);
n_mems++;
- changed = 1;
+ changed = true;
} else if (GET_ATTRIB(opcode, A_STORE)) {
n_mems++;
} else if (GET_ATTRIB(opcode, A_LOAD)) {
@@ -466,7 +466,7 @@ static void decode_shuffle_for_execution(Packet *packet)
* a .new value
*/
} else {
- flag = 1;
+ flag = true;
}
}
@@ -474,7 +474,7 @@ static void decode_shuffle_for_execution(Packet *packet)
continue;
}
/* Compares go first, may be reordered wrt each other */
- for (flag = 0, i = 0; i < last_insn + 1; i++) {
+ for (flag = false, i = 0; i < last_insn + 1; i++) {
int opcode = packet->insn[i].opcode;
if ((strstr(opcode_wregs[opcode], "Pd4") ||
@@ -483,7 +483,7 @@ static void decode_shuffle_for_execution(Packet *packet)
/* This should be a compare (not a store conditional) */
if (flag) {
decode_send_insn_to(packet, i, 0);
- changed = 1;
+ changed = true;
continue;
}
} else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P3) &&
@@ -495,18 +495,18 @@ static void decode_shuffle_for_execution(Packet *packet)
*/
if (flag) {
decode_send_insn_to(packet, i, 0);
- changed = 1;
+ changed = true;
continue;
}
} else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P0) &&
!GET_ATTRIB(opcode, A_NEWCMPJUMP)) {
if (flag) {
decode_send_insn_to(packet, i, 0);
- changed = 1;
+ changed = true;
continue;
}
} else {
- flag = 1;
+ flag = true;
}
}
if (changed) {
@@ -543,7 +543,7 @@ static void decode_apply_extenders(Packet *packet)
int i;
for (i = 0; i < packet->num_insns; i++) {
if (GET_ATTRIB(packet->insn[i].opcode, A_IT_EXTENDER)) {
- packet->insn[i + 1].extension_valid = 1;
+ packet->insn[i + 1].extension_valid = true;
apply_extender(packet, i + 1, packet->insn[i].immed[0]);
}
}
@@ -764,7 +764,7 @@ static void decode_add_endloop_insn(Insn *insn, int loopnum)
}
}
-static inline int decode_parsebits_is_loopend(uint32_t encoding32)
+static bool decode_parsebits_is_loopend(uint32_t encoding32)
{
uint32_t bits = parse_bits(encoding32);
return bits == 0x2;
@@ -775,8 +775,11 @@ decode_set_slot_number(Packet *pkt)
{
int slot;
int i;
- int hit_mem_insn = 0;
- int hit_duplex = 0;
+ bool hit_mem_insn = false;
+ bool hit_duplex = false;
+ bool slot0_found = false;
+ bool slot1_found = false;
+ int slot1_iidx = 0;
/*
* The slots are encoded in reverse order
@@ -801,7 +804,7 @@ decode_set_slot_number(Packet *pkt)
if ((GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE) ||
GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE_PACKET_RULES)) &&
!hit_mem_insn) {
- hit_mem_insn = 1;
+ hit_mem_insn = true;
pkt->insn[i].slot = 0;
continue;
}
@@ -818,7 +821,7 @@ decode_set_slot_number(Packet *pkt)
for (i = pkt->num_insns - 1; i >= 0; i--) {
/* First subinsn always goes to slot 0 */
if (GET_ATTRIB(pkt->insn[i].opcode, A_SUBINSN) && !hit_duplex) {
- hit_duplex = 1;
+ hit_duplex = true;
pkt->insn[i].slot = 0;
continue;
}
@@ -830,13 +833,10 @@ decode_set_slot_number(Packet *pkt)
}
/* Fix the exceptions - slot 1 is never empty, always aligns to slot 0 */
- int slot0_found = 0;
- int slot1_found = 0;
- int slot1_iidx = 0;
for (i = pkt->num_insns - 1; i >= 0; i--) {
/* Is slot0 used? */
if (pkt->insn[i].slot == 0) {
- int is_endloop = (pkt->insn[i].opcode == J2_endloop01);
+ bool is_endloop = (pkt->insn[i].opcode == J2_endloop01);
is_endloop |= (pkt->insn[i].opcode == J2_endloop0);
is_endloop |= (pkt->insn[i].opcode == J2_endloop1);
@@ -845,17 +845,17 @@ decode_set_slot_number(Packet *pkt)
* slot0 for endloop
*/
if (!is_endloop) {
- slot0_found = 1;
+ slot0_found = true;
}
}
/* Is slot1 used? */
if (pkt->insn[i].slot == 1) {
- slot1_found = 1;
+ slot1_found = true;
slot1_iidx = i;
}
}
/* Is slot0 empty and slot1 used? */
- if ((slot0_found == 0) && (slot1_found == 1)) {
+ if ((!slot0_found) && slot1_found) {
/* Then push it to slot0 */
pkt->insn[slot1_iidx].slot = 0;
}
@@ -873,7 +873,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
{
int num_insns = 0;
int words_read = 0;
- int end_of_packet = 0;
+ bool end_of_packet = false;
int new_insns = 0;
uint32_t encoding32;
@@ -890,7 +890,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
* decode works
*/
if (pkt->insn[num_insns].opcode == A4_ext) {
- pkt->insn[num_insns + 1].extension_valid = 1;
+ pkt->insn[num_insns + 1].extension_valid = true;
}
num_insns += new_insns;
words_read++;
@@ -913,7 +913,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
decode_add_endloop_insn(&pkt->insn[pkt->num_insns++], 0);
}
if (words_read >= 3) {
- uint32_t has_loop0, has_loop1;
+ bool has_loop0, has_loop1;
has_loop0 = decode_parsebits_is_loopend(words[0]);
has_loop1 = decode_parsebits_is_loopend(words[1]);
if (has_loop0 && has_loop1) {
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 842d903..d3b45d4 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -19,7 +19,6 @@
#include "qemu/int128.h"
#include "fpu/softfloat.h"
#include "macros.h"
-#include "conv_emu.h"
#include "fma_emu.h"
#define DF_INF_EXP 0x7ff
@@ -64,7 +63,7 @@ typedef union {
};
} Float;
-static inline uint64_t float64_getmant(float64 f64)
+static uint64_t float64_getmant(float64 f64)
{
Double a = { .i = f64 };
if (float64_is_normal(f64)) {
@@ -91,7 +90,7 @@ int32_t float64_getexp(float64 f64)
return -1;
}
-static inline uint64_t float32_getmant(float32 f32)
+static uint64_t float32_getmant(float32 f32)
{
Float a = { .i = f32 };
if (float32_is_normal(f32)) {
@@ -118,17 +117,17 @@ int32_t float32_getexp(float32 f32)
return -1;
}
-static inline uint32_t int128_getw0(Int128 x)
+static uint32_t int128_getw0(Int128 x)
{
return int128_getlo(x);
}
-static inline uint32_t int128_getw1(Int128 x)
+static uint32_t int128_getw1(Int128 x)
{
return int128_getlo(x) >> 32;
}
-static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
+static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
{
Int128 a, b;
uint64_t pp0, pp1a, pp1b, pp1s, pp2;
@@ -152,7 +151,7 @@ static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
return int128_make128(ret_low, pp2 + (pp1s >> 32));
}
-static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
+static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
{
Int128 ret = int128_sub(a, b);
if (borrow != 0) {
@@ -170,7 +169,7 @@ typedef struct {
uint8_t sticky;
} Accum;
-static inline void accum_init(Accum *p)
+static void accum_init(Accum *p)
{
p->mant = int128_zero();
p->exp = 0;
@@ -180,7 +179,7 @@ static inline void accum_init(Accum *p)
p->sticky = 0;
}
-static inline Accum accum_norm_left(Accum a)
+static Accum accum_norm_left(Accum a)
{
a.exp--;
a.mant = int128_lshift(a.mant, 1);
@@ -190,6 +189,7 @@ static inline Accum accum_norm_left(Accum a)
return a;
}
+/* This function is marked inline for performance reasons */
static inline Accum accum_norm_right(Accum a, int amt)
{
if (amt > 130) {
@@ -226,7 +226,7 @@ static inline Accum accum_norm_right(Accum a, int amt)
*/
static Accum accum_add(Accum a, Accum b);
-static inline Accum accum_sub(Accum a, Accum b, int negate)
+static Accum accum_sub(Accum a, Accum b, int negate)
{
Accum ret;
accum_init(&ret);
@@ -329,7 +329,7 @@ static Accum accum_add(Accum a, Accum b)
}
/* Return an infinity with requested sign */
-static inline float64 infinite_float64(uint8_t sign)
+static float64 infinite_float64(uint8_t sign)
{
if (sign) {
return make_float64(DF_MINUS_INF);
@@ -339,7 +339,7 @@ static inline float64 infinite_float64(uint8_t sign)
}
/* Return a maximum finite value with requested sign */
-static inline float64 maxfinite_float64(uint8_t sign)
+static float64 maxfinite_float64(uint8_t sign)
{
if (sign) {
return make_float64(DF_MINUS_MAXF);
@@ -349,7 +349,7 @@ static inline float64 maxfinite_float64(uint8_t sign)
}
/* Return a zero value with requested sign */
-static inline float64 zero_float64(uint8_t sign)
+static float64 zero_float64(uint8_t sign)
{
if (sign) {
return make_float64(0x8000000000000000);
@@ -369,7 +369,7 @@ float32 infinite_float32(uint8_t sign)
}
/* Return a maximum finite value with the requested sign */
-static inline float32 maxfinite_float32(uint8_t sign)
+static float32 maxfinite_float32(uint8_t sign)
{
if (sign) {
return make_float32(SF_MINUS_MAXF);
@@ -379,7 +379,7 @@ static inline float32 maxfinite_float32(uint8_t sign)
}
/* Return a zero value with requested sign */
-static inline float32 zero_float32(uint8_t sign)
+static float32 zero_float32(uint8_t sign)
{
if (sign) {
return make_float32(0x80000000);
@@ -389,7 +389,7 @@ static inline float32 zero_float32(uint8_t sign)
}
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
-static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
+static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
{ \
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
&& ((a.guard | a.round | a.sticky) == 0)) { \
@@ -526,8 +526,8 @@ static bool is_inf_prod(float64 a, float64 b)
(float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
}
-static inline float64 special_fma(float64 a, float64 b, float64 c,
- float_status *fp_status)
+static float64 special_fma(float64 a, float64 b, float64 c,
+ float_status *fp_status)
{
float64 ret = make_float64(0);
@@ -586,8 +586,8 @@ static inline float64 special_fma(float64 a, float64 b, float64 c,
g_assert_not_reached();
}
-static inline float32 special_fmaf(float32 a, float32 b, float32 c,
- float_status *fp_status)
+static float32 special_fmaf(float32 a, float32 b, float32 c,
+ float_status *fp_status)
{
float64 aa, bb, cc;
aa = float32_to_float64(a, fp_status);
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index e044dea..18fcdbc 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -37,7 +37,10 @@
* _sp stack pointer relative r0 = memw(r29+#12)
* _ap absolute set r0 = memw(r1=##variable)
* _pr post increment register r0 = memw(r1++m1)
+ * _pbr post increment bit reverse r0 = memw(r1++m1:brev)
* _pi post increment immediate r0 = memb(r1++#1)
+ * _pci post increment circular immediate r0 = memw(r1++#4:circ(m0))
+ * _pcr post increment circular register r0 = memw(r1++I:circ(m0))
*/
/* Macros for complex addressing modes */
@@ -51,12 +54,32 @@
fEA_REG(RxV); \
fPM_M(RxV, MuV); \
} while (0)
+#define GET_EA_pbr \
+ do { \
+ gen_helper_fbrev(EA, RxV); \
+ tcg_gen_add_tl(RxV, RxV, MuV); \
+ } while (0)
#define GET_EA_pi \
do { \
fEA_REG(RxV); \
fPM_I(RxV, siV); \
} while (0)
-
+#define GET_EA_pci \
+ do { \
+ TCGv tcgv_siV = tcg_const_tl(siV); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \
+ hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(tcgv_siV); \
+ } while (0)
+#define GET_EA_pcr(SHIFT) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, (SHIFT)); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(ireg); \
+ } while (0)
/* Instructions with multiple definitions */
#define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \
@@ -80,20 +103,230 @@
#define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \
fGEN_TCG_LOAD_AP(RddV, 8, u)
+#define fGEN_TCG_L2_loadrub_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrb_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadruh_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrh_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadri_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrd_pci(SHORTCODE) SHORTCODE
+
+#define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, SHIFT); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ LOAD; \
+ tcg_temp_free(ireg); \
+ } while (0)
+
+#define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV))
+#define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV))
+#define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV))
+#define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV))
+#define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV))
+#define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV))
+
#define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrub_pbr(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE
-#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE;
+#define fGEN_TCG_L2_loadrb_pbr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadruh_pr(SHORTCODE) SHORTCODE
-#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE;
+#define fGEN_TCG_L2_loadruh_pbr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrh_pr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrh_pbr(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrh_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadri_pr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadri_pbr(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadri_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrd_pr(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrd_pbr(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrd_pi(SHORTCODE) SHORTCODE
/*
+ * These instructions load 2 bytes and places them in
+ * two halves of the destination register.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \
+ do { \
+ TCGv tmp = tcg_temp_new(); \
+ TCGv byte = tcg_temp_new(); \
+ GET_EA; \
+ fLOAD(1, 2, u, EA, tmp); \
+ tcg_gen_movi_tl(RdV, 0); \
+ for (int i = 0; i < 2; i++) { \
+ gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \
+ } \
+ tcg_temp_free(tmp); \
+ tcg_temp_free(byte); \
+ } while (0)
+
+#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \
+ fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \
+ fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \
+ fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \
+ fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pi, true)
+#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pcr(1), false)
+#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \
+ fGEN_TCG_loadbXw2(GET_EA_pcr(1), true)
+
+/*
+ * These instructions load 4 bytes and places them in
+ * four halves of the destination register pair.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \
+ do { \
+ TCGv tmp = tcg_temp_new(); \
+ TCGv byte = tcg_temp_new(); \
+ GET_EA; \
+ fLOAD(1, 4, u, EA, tmp); \
+ tcg_gen_movi_i64(RddV, 0); \
+ for (int i = 0; i < 4; i++) { \
+ gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN))); \
+ } \
+ tcg_temp_free(tmp); \
+ tcg_temp_free(byte); \
+ } while (0)
+
+#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \
+ fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \
+ fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \
+ fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \
+ fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pcr(2), false)
+#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pcr(2), true)
+#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \
+ fGEN_TCG_loadbXw4(GET_EA_pi, true)
+
+/*
+ * These instructions load a half word, shift the destination right by 16 bits
+ * and place the loaded value in the high half word of the destination pair.
+ * The GET_EA macro determines the addressing mode.
+ */
+#define fGEN_TCG_loadalignh(GET_EA) \
+ do { \
+ TCGv tmp = tcg_temp_new(); \
+ TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
+ GET_EA; \
+ fLOAD(1, 2, u, EA, tmp); \
+ tcg_gen_extu_i32_i64(tmp_i64, tmp); \
+ tcg_gen_shri_i64(RyyV, RyyV, 16); \
+ tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 48, 16); \
+ tcg_temp_free(tmp); \
+ tcg_temp_free_i64(tmp_i64); \
+ } while (0)
+
+#define fGEN_TCG_L4_loadalignh_ur(SHORTCODE) \
+ fGEN_TCG_loadalignh(fEA_IRs(UiV, RtV, uiV))
+#define fGEN_TCG_L2_loadalignh_io(SHORTCODE) \
+ fGEN_TCG_loadalignh(fEA_RI(RsV, siV))
+#define fGEN_TCG_L2_loadalignh_pci(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_pci)
+#define fGEN_TCG_L2_loadalignh_pcr(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_pcr(1))
+#define fGEN_TCG_L4_loadalignh_ap(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_ap)
+#define fGEN_TCG_L2_loadalignh_pr(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_pr)
+#define fGEN_TCG_L2_loadalignh_pbr(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_pbr)
+#define fGEN_TCG_L2_loadalignh_pi(SHORTCODE) \
+ fGEN_TCG_loadalignh(GET_EA_pi)
+
+/* Same as above, but loads a byte instead of half word */
+#define fGEN_TCG_loadalignb(GET_EA) \
+ do { \
+ TCGv tmp = tcg_temp_new(); \
+ TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
+ GET_EA; \
+ fLOAD(1, 1, u, EA, tmp); \
+ tcg_gen_extu_i32_i64(tmp_i64, tmp); \
+ tcg_gen_shri_i64(RyyV, RyyV, 8); \
+ tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 56, 8); \
+ tcg_temp_free(tmp); \
+ tcg_temp_free_i64(tmp_i64); \
+ } while (0)
+
+#define fGEN_TCG_L2_loadalignb_io(SHORTCODE) \
+ fGEN_TCG_loadalignb(fEA_RI(RsV, siV))
+#define fGEN_TCG_L4_loadalignb_ur(SHORTCODE) \
+ fGEN_TCG_loadalignb(fEA_IRs(UiV, RtV, uiV))
+#define fGEN_TCG_L2_loadalignb_pci(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_pci)
+#define fGEN_TCG_L2_loadalignb_pcr(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_pcr(0))
+#define fGEN_TCG_L4_loadalignb_ap(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_ap)
+#define fGEN_TCG_L2_loadalignb_pr(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_pr)
+#define fGEN_TCG_L2_loadalignb_pbr(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_pbr)
+#define fGEN_TCG_L2_loadalignb_pi(SHORTCODE) \
+ fGEN_TCG_loadalignb(GET_EA_pi)
+
+/*
* Predicated loads
* Here is a primer to understand the tag names
*
@@ -195,6 +428,191 @@
#define fGEN_TCG_S4_stored_locked(SHORTCODE) \
do { SHORTCODE; READ_PREG(PdV, PdN); } while (0)
+#define fGEN_TCG_STORE(SHORTCODE) \
+ do { \
+ TCGv HALF = tcg_temp_new(); \
+ TCGv BYTE = tcg_temp_new(); \
+ SHORTCODE; \
+ tcg_temp_free(HALF); \
+ tcg_temp_free(BYTE); \
+ } while (0)
+
+#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ TCGv HALF = tcg_temp_new(); \
+ TCGv BYTE = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, SHIFT); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ STORE; \
+ tcg_temp_free(ireg); \
+ tcg_temp_free(HALF); \
+ tcg_temp_free(BYTE); \
+ } while (0)
+
+#define fGEN_TCG_S2_storerb_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerb_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV)))
+
+#define fGEN_TCG_S2_storerh_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerh_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV)))
+
+#define fGEN_TCG_S2_storerf_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerf_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV)))
+
+#define fGEN_TCG_S2_storeri_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storeri_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV))
+
+#define fGEN_TCG_S2_storerd_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerd_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV))
+
+#define fGEN_TCG_S2_storerbnew_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN)))
+
+#define fGEN_TCG_S2_storerhnew_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN)))
+
+#define fGEN_TCG_S2_storerinew_pbr(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
+
+/*
+ * Mathematical operations with more than one definition require
+ * special handling
+ */
+#define fGEN_TCG_A5_ACS(SHORTCODE) \
+ do { \
+ gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
+ gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
+ } while (0)
+
+/*
+ * Approximate reciprocal
+ * r3,p1 = sfrecipa(r0, r1)
+ *
+ * The helper packs the 2 32-bit results into a 64-bit value,
+ * so unpack them into the proper results.
+ */
+#define fGEN_TCG_F2_sfrecipa(SHORTCODE) \
+ do { \
+ TCGv_i64 tmp = tcg_temp_new_i64(); \
+ gen_helper_sfrecipa(tmp, cpu_env, RsV, RtV); \
+ tcg_gen_extrh_i64_i32(RdV, tmp); \
+ tcg_gen_extrl_i64_i32(PeV, tmp); \
+ tcg_temp_free_i64(tmp); \
+ } while (0)
+
+/*
+ * Approximation of the reciprocal square root
+ * r1,p0 = sfinvsqrta(r0)
+ *
+ * The helper packs the 2 32-bit results into a 64-bit value,
+ * so unpack them into the proper results.
+ */
+#define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \
+ do { \
+ TCGv_i64 tmp = tcg_temp_new_i64(); \
+ gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \
+ tcg_gen_extrh_i64_i32(RdV, tmp); \
+ tcg_gen_extrl_i64_i32(PeV, tmp); \
+ tcg_temp_free_i64(tmp); \
+ } while (0)
+
+/*
+ * Add or subtract with carry.
+ * Predicate register is used as an extra input and output.
+ * r5:4 = add(r1:0, r3:2, p1):carry
+ */
+#define fGEN_TCG_A4_addp_c(SHORTCODE) \
+ do { \
+ TCGv_i64 carry = tcg_temp_new_i64(); \
+ TCGv_i64 zero = tcg_const_i64(0); \
+ tcg_gen_extu_i32_i64(carry, PxV); \
+ tcg_gen_andi_i64(carry, carry, 1); \
+ tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
+ tcg_gen_add2_i64(RddV, carry, RddV, carry, RttV, zero); \
+ tcg_gen_extrl_i64_i32(PxV, carry); \
+ gen_8bitsof(PxV, PxV); \
+ tcg_temp_free_i64(carry); \
+ tcg_temp_free_i64(zero); \
+ } while (0)
+
+/* r5:4 = sub(r1:0, r3:2, p1):carry */
+#define fGEN_TCG_A4_subp_c(SHORTCODE) \
+ do { \
+ TCGv_i64 carry = tcg_temp_new_i64(); \
+ TCGv_i64 zero = tcg_const_i64(0); \
+ TCGv_i64 not_RttV = tcg_temp_new_i64(); \
+ tcg_gen_extu_i32_i64(carry, PxV); \
+ tcg_gen_andi_i64(carry, carry, 1); \
+ tcg_gen_not_i64(not_RttV, RttV); \
+ tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
+ tcg_gen_add2_i64(RddV, carry, RddV, carry, not_RttV, zero); \
+ tcg_gen_extrl_i64_i32(PxV, carry); \
+ gen_8bitsof(PxV, PxV); \
+ tcg_temp_free_i64(carry); \
+ tcg_temp_free_i64(zero); \
+ tcg_temp_free_i64(not_RttV); \
+ } while (0)
+
+/*
+ * Compare each of the 8 unsigned bytes
+ * The minimum is placed in each byte of the destination.
+ * Each bit of the predicate is set true if the bit from the first operand
+ * is greater than the bit from the second operand.
+ * r5:4,p1 = vminub(r1:0, r3:2)
+ */
+#define fGEN_TCG_A6_vminub_RdP(SHORTCODE) \
+ do { \
+ TCGv left = tcg_temp_new(); \
+ TCGv right = tcg_temp_new(); \
+ TCGv tmp = tcg_temp_new(); \
+ tcg_gen_movi_tl(PeV, 0); \
+ tcg_gen_movi_i64(RddV, 0); \
+ for (int i = 0; i < 8; i++) { \
+ gen_get_byte_i64(left, i, RttV, false); \
+ gen_get_byte_i64(right, i, RssV, false); \
+ tcg_gen_setcond_tl(TCG_COND_GT, tmp, left, right); \
+ tcg_gen_deposit_tl(PeV, PeV, tmp, i, 1); \
+ tcg_gen_umin_tl(tmp, left, right); \
+ gen_set_byte_i64(i, RddV, tmp); \
+ } \
+ tcg_temp_free(left); \
+ tcg_temp_free(right); \
+ tcg_temp_free(tmp); \
+ } while (0)
+
/* Floating point */
#define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
gen_helper_conv_sf2df(RddV, cpu_env, RsV)
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index db9f663..7ceb25b 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -316,7 +316,7 @@ def genptr_dst_write(f, tag, regtype, regid):
print("Bad register parse: ", regtype, regid)
elif (regtype == "P"):
if (regid in {"d", "e", "x"}):
- f.write(" gen_log_pred_write(%s%sN, %s%sV);\n" % \
+ f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \
(regtype, regid, regtype, regid))
f.write(" ctx_log_pred_write(ctx, %s%sN);\n" % \
(regtype, regid))
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 7481f4c..f93f895 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -15,7 +15,6 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#define QEMU_GENERATE
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "cpu.h"
@@ -24,7 +23,9 @@
#include "insn.h"
#include "opcodes.h"
#include "translate.h"
+#define QEMU_GENERATE /* Used internally by macros.h */
#include "macros.h"
+#undef QEMU_GENERATE
#include "gen_tcg.h"
static inline TCGv gen_read_preg(TCGv pred, uint8_t num)
@@ -35,20 +36,24 @@ static inline TCGv gen_read_preg(TCGv pred, uint8_t num)
static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot)
{
- TCGv one = tcg_const_tl(1);
TCGv zero = tcg_const_tl(0);
TCGv slot_mask = tcg_temp_new();
tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
val, hex_new_value[rnum]);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], slot_mask, zero,
- one, hex_reg_written[rnum]);
-#endif
+ if (HEX_DEBUG) {
+ /*
+ * Do this so HELPER(debug_commit_end) will know
+ *
+ * Note that slot_mask indicates the value is not written
+ * (i.e., slot was cancelled), so we create a true/false value before
+ * or'ing with hex_reg_written[rnum].
+ */
+ tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
+ tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
+ }
- tcg_temp_free(one);
tcg_temp_free(zero);
tcg_temp_free(slot_mask);
}
@@ -56,45 +61,44 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot)
static inline void gen_log_reg_write(int rnum, TCGv val)
{
tcg_gen_mov_tl(hex_new_value[rnum], val);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-#endif
+ if (HEX_DEBUG) {
+ /* Do this so HELPER(debug_commit_end) will know */
+ tcg_gen_movi_tl(hex_reg_written[rnum], 1);
+ }
}
static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot)
{
TCGv val32 = tcg_temp_new();
- TCGv one = tcg_const_tl(1);
TCGv zero = tcg_const_tl(0);
TCGv slot_mask = tcg_temp_new();
tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
/* Low word */
tcg_gen_extrl_i64_i32(val32, val);
- tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
- val32, hex_new_value[rnum]);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum],
+ tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum],
slot_mask, zero,
- one, hex_reg_written[rnum]);
-#endif
-
+ val32, hex_new_value[rnum]);
/* High word */
tcg_gen_extrh_i64_i32(val32, val);
tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1],
slot_mask, zero,
val32, hex_new_value[rnum + 1]);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum + 1],
- slot_mask, zero,
- one, hex_reg_written[rnum + 1]);
-#endif
+ if (HEX_DEBUG) {
+ /*
+ * Do this so HELPER(debug_commit_end) will know
+ *
+ * Note that slot_mask indicates the value is not written
+ * (i.e., slot was cancelled), so we create a true/false value before
+ * or'ing with hex_reg_written[rnum].
+ */
+ tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
+ tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
+ tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1],
+ slot_mask);
+ }
tcg_temp_free(val32);
- tcg_temp_free(one);
tcg_temp_free(zero);
tcg_temp_free(slot_mask);
}
@@ -103,33 +107,41 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
{
/* Low word */
tcg_gen_extrl_i64_i32(hex_new_value[rnum], val);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-#endif
+ if (HEX_DEBUG) {
+ /* Do this so HELPER(debug_commit_end) will know */
+ tcg_gen_movi_tl(hex_reg_written[rnum], 1);
+ }
/* High word */
tcg_gen_extrh_i64_i32(hex_new_value[rnum + 1], val);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
-#endif
+ if (HEX_DEBUG) {
+ /* Do this so HELPER(debug_commit_end) will know */
+ tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
+ }
}
-static inline void gen_log_pred_write(int pnum, TCGv val)
+static inline void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
{
TCGv zero = tcg_const_tl(0);
TCGv base_val = tcg_temp_new();
TCGv and_val = tcg_temp_new();
TCGv pred_written = tcg_temp_new();
- /* Multiple writes to the same preg are and'ed together */
tcg_gen_andi_tl(base_val, val, 0xff);
- tcg_gen_and_tl(and_val, base_val, hex_new_pred_value[pnum]);
- tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pnum);
- tcg_gen_movcond_tl(TCG_COND_NE, hex_new_pred_value[pnum],
- pred_written, zero,
- and_val, base_val);
+
+ /*
+ * Section 6.1.3 of the Hexagon V67 Programmer's Reference Manual
+ *
+ * Multiple writes to the same preg are and'ed together
+ * If this is the first predicate write in the packet, do a
+ * straight assignment. Otherwise, do an and.
+ */
+ if (!test_bit(pnum, ctx->pregs_written)) {
+ tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val);
+ } else {
+ tcg_gen_and_tl(hex_new_pred_value[pnum],
+ hex_new_pred_value[pnum], base_val);
+ }
tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
tcg_temp_free(zero);
@@ -254,6 +266,61 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
}
}
+static TCGv gen_get_byte(TCGv result, int N, TCGv src, bool sign)
+{
+ if (sign) {
+ tcg_gen_sextract_tl(result, src, N * 8, 8);
+ } else {
+ tcg_gen_extract_tl(result, src, N * 8, 8);
+ }
+ return result;
+}
+
+static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign)
+{
+ TCGv_i64 res64 = tcg_temp_new_i64();
+ if (sign) {
+ tcg_gen_sextract_i64(res64, src, N * 8, 8);
+ } else {
+ tcg_gen_extract_i64(res64, src, N * 8, 8);
+ }
+ tcg_gen_extrl_i64_i32(result, res64);
+ tcg_temp_free_i64(res64);
+
+ return result;
+}
+
+static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign)
+{
+ if (sign) {
+ tcg_gen_sextract_tl(result, src, N * 16, 16);
+ } else {
+ tcg_gen_extract_tl(result, src, N * 16, 16);
+ }
+ return result;
+}
+
+static inline void gen_set_half(int N, TCGv result, TCGv src)
+{
+ tcg_gen_deposit_tl(result, result, src, N * 16, 16);
+}
+
+static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src)
+{
+ TCGv_i64 src64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(src64, src);
+ tcg_gen_deposit_i64(result, result, src64, N * 16, 16);
+ tcg_temp_free_i64(src64);
+}
+
+static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src)
+{
+ TCGv_i64 src64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(src64, src);
+ tcg_gen_deposit_i64(result, result, src64, N * 8, 8);
+ tcg_temp_free_i64(src64);
+}
+
static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index)
{
tcg_gen_qemu_ld32u(dest, vaddr, mem_index);
@@ -327,5 +394,85 @@ static inline void gen_store_conditional8(CPUHexagonState *env,
tcg_gen_movi_tl(hex_llsc_addr, ~0);
}
+static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot)
+{
+ tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+ tcg_gen_movi_tl(hex_store_width[slot], width);
+ tcg_gen_mov_tl(hex_store_val32[slot], src);
+}
+
+static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+ DisasContext *ctx, int slot)
+{
+ gen_store32(vaddr, src, 1, slot);
+ ctx->store_width[slot] = 1;
+}
+
+static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+ DisasContext *ctx, int slot)
+{
+ TCGv tmp = tcg_const_tl(src);
+ gen_store1(cpu_env, vaddr, tmp, ctx, slot);
+ tcg_temp_free(tmp);
+}
+
+static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+ DisasContext *ctx, int slot)
+{
+ gen_store32(vaddr, src, 2, slot);
+ ctx->store_width[slot] = 2;
+}
+
+static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+ DisasContext *ctx, int slot)
+{
+ TCGv tmp = tcg_const_tl(src);
+ gen_store2(cpu_env, vaddr, tmp, ctx, slot);
+ tcg_temp_free(tmp);
+}
+
+static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+ DisasContext *ctx, int slot)
+{
+ gen_store32(vaddr, src, 4, slot);
+ ctx->store_width[slot] = 4;
+}
+
+static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+ DisasContext *ctx, int slot)
+{
+ TCGv tmp = tcg_const_tl(src);
+ gen_store4(cpu_env, vaddr, tmp, ctx, slot);
+ tcg_temp_free(tmp);
+}
+
+static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src,
+ DisasContext *ctx, int slot)
+{
+ tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+ tcg_gen_movi_tl(hex_store_width[slot], 8);
+ tcg_gen_mov_i64(hex_store_val64[slot], src);
+ ctx->store_width[slot] = 8;
+}
+
+static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src,
+ DisasContext *ctx, int slot)
+{
+ TCGv_i64 tmp = tcg_const_i64(src);
+ gen_store8(cpu_env, vaddr, tmp, ctx, slot);
+ tcg_temp_free_i64(tmp);
+}
+
+static TCGv gen_8bitsof(TCGv result, TCGv value)
+{
+ TCGv zero = tcg_const_tl(0);
+ TCGv ones = tcg_const_tl(0xff);
+ tcg_gen_movcond_tl(TCG_COND_NE, result, value, zero, ones, zero);
+ tcg_temp_free(zero);
+ tcg_temp_free(ones);
+
+ return result;
+}
+
#include "tcg_funcs_generated.c.inc"
#include "tcg_func_table_generated.c.inc"
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index a5f340c..ca201fb 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -19,13 +19,16 @@
#include "helper_protos_generated.h.inc"
DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32)
-#if HEX_DEBUG
DEF_HELPER_1(debug_start_packet, void, env)
DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int)
DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int)
-#endif
DEF_HELPER_2(commit_store, void, env, int)
DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
+DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
+DEF_HELPER_2(sfinvsqrta, i64, env, f32)
+DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
+DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
/* Floating point */
DEF_HELPER_2(conv_sf2df, f64, env, f32)
@@ -38,21 +41,21 @@ DEF_HELPER_2(conv_ud2sf, f32, env, s64)
DEF_HELPER_2(conv_ud2df, f64, env, s64)
DEF_HELPER_2(conv_d2sf, f32, env, s64)
DEF_HELPER_2(conv_d2df, f64, env, s64)
-DEF_HELPER_2(conv_sf2uw, s32, env, f32)
+DEF_HELPER_2(conv_sf2uw, i32, env, f32)
DEF_HELPER_2(conv_sf2w, s32, env, f32)
-DEF_HELPER_2(conv_sf2ud, s64, env, f32)
+DEF_HELPER_2(conv_sf2ud, i64, env, f32)
DEF_HELPER_2(conv_sf2d, s64, env, f32)
-DEF_HELPER_2(conv_df2uw, s32, env, f64)
+DEF_HELPER_2(conv_df2uw, i32, env, f64)
DEF_HELPER_2(conv_df2w, s32, env, f64)
-DEF_HELPER_2(conv_df2ud, s64, env, f64)
+DEF_HELPER_2(conv_df2ud, i64, env, f64)
DEF_HELPER_2(conv_df2d, s64, env, f64)
-DEF_HELPER_2(conv_sf2uw_chop, s32, env, f32)
+DEF_HELPER_2(conv_sf2uw_chop, i32, env, f32)
DEF_HELPER_2(conv_sf2w_chop, s32, env, f32)
-DEF_HELPER_2(conv_sf2ud_chop, s64, env, f32)
+DEF_HELPER_2(conv_sf2ud_chop, i64, env, f32)
DEF_HELPER_2(conv_sf2d_chop, s64, env, f32)
-DEF_HELPER_2(conv_df2uw_chop, s32, env, f64)
+DEF_HELPER_2(conv_df2uw_chop, i32, env, f64)
DEF_HELPER_2(conv_df2w_chop, s32, env, f64)
-DEF_HELPER_2(conv_df2ud_chop, s64, env, f64)
+DEF_HELPER_2(conv_df2ud_chop, i64, env, f64)
DEF_HELPER_2(conv_df2d_chop, s64, env, f64)
DEF_HELPER_3(sfadd, f32, env, f32, f32)
DEF_HELPER_3(sfsub, f32, env, f32, f32)
diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c
index 378d8a6..6091286 100644
--- a/target/hexagon/iclass.c
+++ b/target/hexagon/iclass.c
@@ -53,10 +53,6 @@ SlotMask find_iclass_slots(Opcode opcode, int itype)
(opcode == Y2_isync) ||
(opcode == J2_pause) || (opcode == J4_hintjumpr)) {
return SLOTS_2;
- } else if ((itype == ICLASS_V2LDST) && (GET_ATTRIB(opcode, A_STORE))) {
- return SLOTS_01;
- } else if ((itype == ICLASS_V2LDST) && (!GET_ATTRIB(opcode, A_STORE))) {
- return SLOTS_01;
} else if (GET_ATTRIB(opcode, A_CRSLOT23)) {
return SLOTS_23;
} else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) {
diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef
index 45cc529..58477ae 100644
--- a/target/hexagon/imported/alu.idef
+++ b/target/hexagon/imported/alu.idef
@@ -153,6 +153,21 @@ Q6INSN(A2_subp,"Rdd32=sub(Rtt32,Rss32)",ATTRIBS(),
"Sub",
{ RddV=RttV-RssV;})
+/* 64-bit with carry */
+
+Q6INSN(A4_addp_c,"Rdd32=add(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Add with Carry",
+{
+ RddV = RssV + RttV + fLSBOLD(PxV);
+ PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,RttV,fLSBOLD(PxV)));
+})
+
+Q6INSN(A4_subp_c,"Rdd32=sub(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Sub with Carry",
+{
+ RddV = RssV + ~RttV + fLSBOLD(PxV);
+ PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,~RttV,fLSBOLD(PxV)));
+})
+
+
/* NEG and ABS */
Q6INSN(A2_negsat,"Rd32=neg(Rs32):sat",ATTRIBS(),
@@ -1240,6 +1255,35 @@ MINMAX(uw,WORD,UWORD,2)
#undef VMINORMAX3
+Q6INSN(A5_ACS,"Rxx32,Pe4=vacsh(Rss32,Rtt32)",ATTRIBS(),
+"Add Compare and Select elements of two vectors, record the maximums and the decisions ",
+{
+ fHIDE(int i;)
+ fHIDE(int xv;)
+ fHIDE(int sv;)
+ fHIDE(int tv;)
+ for (i = 0; i < 4; i++) {
+ xv = (int) fGETHALF(i,RxxV);
+ sv = (int) fGETHALF(i,RssV);
+ tv = (int) fGETHALF(i,RttV);
+ xv = xv + tv; //assumes 17bit datapath
+ sv = sv - tv; //assumes 17bit datapath
+ fSETBIT(i*2, PeV, (xv > sv));
+ fSETBIT(i*2+1,PeV, (xv > sv));
+ fSETHALF(i, RxxV, fSATH(fMAX(xv,sv)));
+ }
+})
+
+Q6INSN(A6_vminub_RdP,"Rdd32,Pe4=vminub(Rtt32,Rss32)",ATTRIBS(),
+"Vector minimum of bytes, records minimum and decision vector",
+{
+ fHIDE(int i;)
+ for (i = 0; i < 8; i++) {
+ fSETBIT(i, PeV, (fGETUBYTE(i,RttV) > fGETUBYTE(i,RssV)));
+ fSETBYTE(i,RddV,fMIN(fGETUBYTE(i,RttV),fGETUBYTE(i,RssV)));
+ }
+})
+
/**********************************************/
/* Vector Min/Max */
/**********************************************/
diff --git a/target/hexagon/imported/compare.idef b/target/hexagon/imported/compare.idef
index 3551467..abd016f 100644
--- a/target/hexagon/imported/compare.idef
+++ b/target/hexagon/imported/compare.idef
@@ -198,11 +198,11 @@ Q6INSN(C4_or_orn,"Pd4=or(Ps4,or(Pt4,!Pu4))",ATTRIBS(A_CRSLOT23),
Q6INSN(C2_any8,"Pd4=any8(Ps4)",ATTRIBS(A_CRSLOT23),
"Logical ANY of low 8 predicate bits",
-{ PsV ? (PdV=0xff) : (PdV=0x00); })
+{ PdV = (PsV ? 0xff : 0x00); })
Q6INSN(C2_all8,"Pd4=all8(Ps4)",ATTRIBS(A_CRSLOT23),
"Logical ALL of low 8 predicate bits",
-{ (PsV==0xff) ? (PdV=0xff) : (PdV=0x00); })
+{ PdV = (PsV == 0xff ? 0xff : 0x00); })
Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(),
"Pack the odd and even bits of two predicate registers",
@@ -212,7 +212,7 @@ Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(),
Q6INSN(C2_mux,"Rd32=mux(Pu4,Rs32,Rt32)",ATTRIBS(),
"Scalar MUX",
-{ (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=RtV); })
+{ RdV = (fLSBOLD(PuV) ? RsV : RtV); })
Q6INSN(C2_cmovenewit,"if (Pu4.new) Rd32=#s12",ATTRIBS(A_ARCHV2),
@@ -269,18 +269,18 @@ Q6INSN(C2_ccombinewf,"if (!Pu4) Rdd32=combine(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
Q6INSN(C2_muxii,"Rd32=mux(Pu4,#s8,#S8)",ATTRIBS(A_ARCHV2),
"Scalar MUX immediates",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=SiV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : SiV); })
Q6INSN(C2_muxir,"Rd32=mux(Pu4,Rs32,#s8)",ATTRIBS(A_ARCHV2),
"Scalar MUX register immediate",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=siV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? RsV : siV); })
Q6INSN(C2_muxri,"Rd32=mux(Pu4,#s8,Rs32)",ATTRIBS(A_ARCHV2),
"Scalar MUX register immediate",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=RsV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : RsV); })
diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def
index c21cb73..35ae3d2 100644
--- a/target/hexagon/imported/encode_pp.def
+++ b/target/hexagon/imported/encode_pp.def
@@ -294,12 +294,14 @@ DEF_CLASS32(ICLASS_LD" ---- -------- PP------ --------",LD)
DEF_CLASS32(ICLASS_LD" 0--- -------- PP------ --------",LD_ADDR_ROFFSET)
+DEF_CLASS32(ICLASS_LD" 100- -------- PP----0- --------",LD_ADDR_POST_CIRC_IMMED)
DEF_CLASS32(ICLASS_LD" 101- -------- PP00---- --------",LD_ADDR_POST_IMMED)
DEF_CLASS32(ICLASS_LD" 101- -------- PP01---- --------",LD_ADDR_ABS_UPDATE_V4)
DEF_CLASS32(ICLASS_LD" 101- -------- PP1----- --------",LD_ADDR_POST_IMMED_PRED_V2)
DEF_CLASS32(ICLASS_LD" 110- -------- PP-0---- 0-------",LD_ADDR_POST_REG)
DEF_CLASS32(ICLASS_LD" 110- -------- PP-1---- --------",LD_ADDR_ABS_PLUS_REG_V4)
DEF_CLASS32(ICLASS_LD" 100- -------- PP----1- --------",LD_ADDR_POST_CREG_V2)
+DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 0-------",LD_ADDR_POST_BREV_REG)
DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 1-------",LD_ADDR_PRED_ABS_V4)
DEF_FIELD32(ICLASS_LD" !!!- -------- PP------ --------",LD_Amode,"Amode")
@@ -308,18 +310,24 @@ DEF_FIELD32(ICLASS_LD" ---- --!----- PP------ --------",LD_UN,"Unsigned")
#define STD_LD_ENC(TAG,OPC) \
DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiddddd")\
+DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiddddd")\
DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiddddd")\
DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIddddd")\
DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--ddddd")\
DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIddddd")\
+DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--ddddd")\
+DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--ddddd")
#define STD_LDX_ENC(TAG,OPC) \
DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiyyyyy")\
+DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiyyyyy")\
DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiyyyyy")\
DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIyyyyy")\
DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--yyyyy")\
DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIyyyyy")\
+DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--yyyyy")\
+DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--yyyyy")
#define STD_PLD_ENC(TAG,OPC) \
@@ -334,6 +342,15 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddd
/* 0 000 misc: dealloc,loadw_locked,dcfetch */
+STD_LD_ENC(bzw4,"0 101")
+STD_LD_ENC(bzw2,"0 011")
+
+STD_LD_ENC(bsw4,"0 111")
+STD_LD_ENC(bsw2,"0 001")
+
+STD_LDX_ENC(alignh,"0 010")
+STD_LDX_ENC(alignb,"0 100")
+
STD_LD_ENC(rb, "1 000")
STD_LD_ENC(rub, "1 001")
STD_LD_ENC(rh, "1 010")
@@ -351,6 +368,7 @@ STD_PLD_ENC(rd, "1 110") /* note dest reg field LSB=0, 1 is reserved */
DEF_CLASS32( ICLASS_LD" 0--0 000----- PP------ --------",LD_MISC)
DEF_ANTICLASS32(ICLASS_LD" 0--0 000----- PP------ --------",LD_ADDR_ROFFSET)
+DEF_ANTICLASS32(ICLASS_LD" 1000 000----- PP------ --------",LD_ADDR_POST_CIRC_IMMED)
DEF_ANTICLASS32(ICLASS_LD" 1010 000----- PP------ --------",LD_ADDR_POST_IMMED)
DEF_ANTICLASS32(ICLASS_LD" 1100 000----- PP------ --------",LD_ADDR_POST_REG)
DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG)
@@ -397,6 +415,7 @@ DEF_FIELD32(ICLASS_ST" ---! !!------ PP------ --------",ST_Type,"Type")
DEF_FIELD32(ICLASS_ST" ---- --!----- PP------ --------",ST_UN,"Unsigned")
DEF_CLASS32(ICLASS_ST" 0--1 -------- PP------ --------",ST_ADDR_ROFFSET)
+DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------0-",ST_ADDR_POST_CIRC_IMMED)
DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 0-----0-",ST_ADDR_POST_IMMED)
DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 1-------",ST_ADDR_ABS_UPDATE_V4)
DEF_CLASS32(ICLASS_ST" 1011 -------- PP1----- --------",ST_ADDR_POST_IMMED_PRED_V2)
@@ -404,6 +423,7 @@ DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 1-------",ST_ADDR_PRED_ABS_V4)
DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 0-------",ST_ADDR_POST_REG)
DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 1-------",ST_ADDR_ABS_PLUS_REG_V4)
DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------1-",ST_ADDR_POST_CREG_V2)
+DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 0-------",ST_ADDR_POST_BREV_REG)
DEF_CLASS32(ICLASS_ST" 0--0 1------- PP------ --------",ST_MISC_STORELIKE)
DEF_CLASS32(ICLASS_ST" 1--0 0------- PP------ --------",ST_MISC_BUSOP)
DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP)
@@ -411,10 +431,13 @@ DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP)
#define STD_ST_ENC(TAG,OPC,SRC) \
DEF_ENC32(S2_store##TAG##_io, ICLASS_ST" 0 ii "OPC" sssss PPi"SRC" iiiiiiii")\
+DEF_ENC32(S2_store##TAG##_pci, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0iiii-0-")\
DEF_ENC32(S2_store##TAG##_pi, ICLASS_ST" 1 01 "OPC" xxxxx PP0"SRC" 0iiii-0-")\
DEF_ENC32(S4_store##TAG##_ap, ICLASS_ST" 1 01 "OPC" eeeee PP0"SRC" 1-IIIIII")\
DEF_ENC32(S2_store##TAG##_pr, ICLASS_ST" 1 10 "OPC" xxxxx PPu"SRC" 0-------")\
DEF_ENC32(S4_store##TAG##_ur, ICLASS_ST" 1 10 "OPC" uuuuu PPi"SRC" 1iIIIIII")\
+DEF_ENC32(S2_store##TAG##_pcr, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0-----1-")\
+DEF_ENC32(S2_store##TAG##_pbr, ICLASS_ST" 1 11 "OPC" xxxxx PPu"SRC" 0-------")
#define STD_PST_ENC(TAG,OPC,SRC) \
@@ -1017,6 +1040,8 @@ MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10")
+MPY_ENC(A5_ACS, "1010","xxxxx","0","1","0","1","ee")
+MPY_ENC(A6_vminub_RdP, "1010","ddddd","0","1","1","1","ee")
/*
*/
@@ -1028,6 +1053,7 @@ MPY_ENC(F2_sfmin, "1011","ddddd","0","0","0","1","01")
MPY_ENC(F2_sfmpy, "1011","ddddd","0","0","1","0","00")
MPY_ENC(F2_sffixupn, "1011","ddddd","0","0","1","1","00")
MPY_ENC(F2_sffixupd, "1011","ddddd","0","0","1","1","01")
+MPY_ENC(F2_sfrecipa, "1011","ddddd","1","1","1","1","ee")
DEF_FIELDROW_DESC32(ICLASS_M" 1100 -------- PP------ --------","[#12] Rd=(Rs,Rt)")
DEF_FIELD32(ICLASS_M" 1100 -------- PP------ --!-----",Mc_tH,"Rt is High") /*Rt high */
@@ -1641,6 +1667,7 @@ SH2_RR_ENC(F2_conv_sf2w, "1011","100","-","000","ddddd")
SH2_RR_ENC(F2_conv_sf2uw_chop, "1011","011","-","001","ddddd")
SH2_RR_ENC(F2_conv_sf2w_chop, "1011","100","-","001","ddddd")
SH2_RR_ENC(F2_sffixupr, "1011","101","-","000","ddddd")
+SH2_RR_ENC(F2_sfinvsqrta, "1011","111","-","0ee","ddddd")
DEF_FIELDROW_DESC32(ICLASS_S2op" 1100 -------- PP------ --------","[#12] Rd=(Rs,#u6)")
@@ -1740,11 +1767,14 @@ SH_RRR_ENC(S4_vxsubaddh, "0001","01-","-","110","ddddd")
SH_RRR_ENC(S4_vxaddsubhr, "0001","11-","-","00-","ddddd")
SH_RRR_ENC(S4_vxsubaddhr, "0001","11-","-","01-","ddddd")
SH_RRR_ENC(S4_extractp_rp, "0001","11-","-","10-","ddddd")
+SH_RRR_ENC(S2_cabacdecbin, "0001","11-","-","11-","ddddd") /* implicit P0 write */
DEF_FIELDROW_DESC32(ICLASS_S3op" 0010 -------- PP------ --------","[#2] Rdd=(Rss,Rtt,Pu)")
SH_RRR_ENC(S2_valignrb, "0010","0--","-","-uu","ddddd")
SH_RRR_ENC(S2_vsplicerb, "0010","100","-","-uu","ddddd")
+SH_RRR_ENC(A4_addp_c, "0010","110","-","-xx","ddddd")
+SH_RRR_ENC(A4_subp_c, "0010","111","-","-xx","ddddd")
DEF_FIELDROW_DESC32(ICLASS_S3op" 0011 -------- PP------ --------","[#3] Rdd=(Rss,Rt)")
diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef
index 76cecfe..3e75bc4 100644
--- a/target/hexagon/imported/float.idef
+++ b/target/hexagon/imported/float.idef
@@ -146,6 +146,22 @@ Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(),
})
+Q6INSN(F2_sfrecipa,"Rd32,Pe4=sfrecipa(Rs32,Rt32)",ATTRIBS(),
+"Reciprocal Approximation for Division",
+{
+ fHIDE(int idx;)
+ fHIDE(int adjust;)
+ fHIDE(int mant;)
+ fHIDE(int exp;)
+ if (fSF_RECIP_COMMON(RsV,RtV,RdV,adjust)) {
+ PeV = adjust;
+ idx = (RtV >> 16) & 0x7f;
+ mant = (fSF_RECIP_LOOKUP(idx) << 15) | 1;
+ exp = fSF_BIAS() - (fSF_GETEXP(RtV) - fSF_BIAS()) - 1;
+ RdV = fMAKESF(fGETBIT(31,RtV),exp,mant);
+ }
+})
+
Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(),
"Fix Up Numerator",
{
@@ -162,6 +178,22 @@ Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(),
RdV = RtV;
})
+Q6INSN(F2_sfinvsqrta,"Rd32,Pe4=sfinvsqrta(Rs32)",ATTRIBS(),
+"Reciprocal Square Root Approximation",
+{
+ fHIDE(int idx;)
+ fHIDE(int adjust;)
+ fHIDE(int mant;)
+ fHIDE(int exp;)
+ if (fSF_INVSQRT_COMMON(RsV,RdV,adjust)) {
+ PeV = adjust;
+ idx = (RsV >> 17) & 0x7f;
+ mant = (fSF_INVSQRT_LOOKUP(idx) << 15);
+ exp = fSF_BIAS() - ((fSF_GETEXP(RsV) - fSF_BIAS()) >> 1) - 1;
+ RdV = fMAKESF(fGETBIT(31,RsV),exp,mant);
+ }
+})
+
Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(),
"Fix Up Radicand",
{
diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef
index 78a2ea4..359d3b7 100644
--- a/target/hexagon/imported/ldst.idef
+++ b/target/hexagon/imported/ldst.idef
@@ -25,7 +25,10 @@ Q6INSN(L2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")", ATTRIB,DESCR,{fIMMEXT(
Q6INSN(L4_##TAG##_ur, OPER"(Rt32<<#u2+#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RtV,uiV); SEMANTICS;})\
Q6INSN(L4_##TAG##_ap, OPER"(Re32=#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\
Q6INSN(L2_##TAG##_pr, OPER"(Rx32++Mu2)", ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)", ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS;})\
Q6INSN(L2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")", ATTRIB,DESCR,{fEA_REG(RxV); fPM_I(RxV,siV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))",ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))", ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;})
/* The set of 32-bit load instructions */
STD_LD_AMODES(loadrub,"Rd32=memub","Load Unsigned Byte",ATTRIBS(A_LOAD),"0",fLOAD(1,1,u,EA,RdV),0)
@@ -35,6 +38,68 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1
STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2)
STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3)
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+ fSETHALF(i,RdV,fGETUBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+ fSETHALF(i,RddV,fGETUBYTE(i,tmpV));
+ }
+},2)
+
+
+
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+ fSETHALF(i,RdV,fGETBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+ fSETHALF(i,RddV,fGETBYTE(i,tmpV));
+ }
+},2)
+
+
+
+STD_LD_AMODES(loadalignh, "Ryy32=memh_fifo", "Load Half-word into shifted vector",
+ATTRIBS(A_LOAD),"1",
+{
+ fHIDE(size8u_t tmpV;)
+ fLOAD(1,2,u,EA,tmpV);
+ RyyV = (((size8u_t)RyyV)>>16)|(tmpV<<48);
+},1)
+
+
+STD_LD_AMODES(loadalignb, "Ryy32=memb_fifo", "Load byte into shifted vector",
+ATTRIBS(A_LOAD),"0",
+{
+ fHIDE(size8u_t tmpV;)
+ fLOAD(1,1,u,EA,tmpV);
+ RyyV = (((size8u_t)RyyV)>>8)|(tmpV<<56);
+},0)
+
+
+
+
/* The set of addressing modes standard to all Store instructions */
#define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\
Q6INSN(S2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")="DEST, ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\
@@ -42,6 +107,9 @@ Q6INSN(S2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")="DEST, ATTRIB,DESCR,{fEA_REG(
Q6INSN(S4_##TAG##_ap, OPER"(Re32=#U6)="DEST, ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\
Q6INSN(S2_##TAG##_pr, OPER"(Rx32++Mu2)="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS; })\
Q6INSN(S4_##TAG##_ur, OPER"(Ru32<<#u2+#U6)="DEST, ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RuV,uiV); SEMANTICS;})\
+Q6INSN(S2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)="DEST, ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS; })\
+Q6INSN(S2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\
+Q6INSN(S2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))="DEST, ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;})
/* The set of 32-bit store instructions */
diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def
index 65292c7..32ed3bf 100755
--- a/target/hexagon/imported/macros.def
+++ b/target/hexagon/imported/macros.def
@@ -92,6 +92,21 @@ DEF_MACRO(
/* attribs */
)
+
+DEF_MACRO(
+ fINSERT_RANGE,
+ {
+ int offset=LOWBIT;
+ int width=HIBIT-LOWBIT+1;
+ /* clear bits where new bits go */
+ INREG &= ~(((fCONSTLL(1)<<width)-1)<<offset);
+ /* OR in new bits */
+ INREG |= ((INVAL & ((fCONSTLL(1)<<width)-1)) << offset);
+ },
+ /* attribs */
+)
+
+
DEF_MACRO(
f8BITSOF,
( (VAL) ? 0xff : 0x00),
@@ -277,6 +292,12 @@ DEF_MACRO(
/*************************************/
DEF_MACRO(
+ fREAD_IREG, /* read modifier register */
+ (fSXTN(11,64,(((VAL) & 0xf0000000)>>21) | ((VAL>>17)&0x7f) )), /* behavior */
+ ()
+)
+
+DEF_MACRO(
fREAD_LR, /* read link register */
(READ_RREG(REG_LR)), /* behavior */
()
@@ -307,6 +328,12 @@ DEF_MACRO(
)
DEF_MACRO(
+ fREAD_CSREG, /* read CS register */
+ (READ_RREG(REG_CSA+N)), /* behavior */
+ ()
+)
+
+DEF_MACRO(
fREAD_LC0, /* read loop count */
(READ_RREG(REG_LC0)), /* behavior */
()
@@ -807,6 +834,12 @@ DEF_MACRO(
)
DEF_MACRO(
+ fEA_BREVR, /* Calculate EA with bit reversed bottom of REGISTER */
+ EA=fbrev(REG),
+ ()
+)
+
+DEF_MACRO(
fEA_GPI, /* Calculate EA with Global Poitner + Immediate */
do { EA=fREAD_GP()+IMM; fGP_DOCHKPAGECROSS(fREAD_GP(),EA); } while (0),
()
@@ -825,6 +858,20 @@ DEF_MACRO(
)
DEF_MACRO(
+ fPM_CIRI, /* Post Modify Register using Circular arithmetic by Immediate */
+ do { fcirc_add(REG,siV,MuV); } while (0),
+ ()
+)
+
+DEF_MACRO(
+ fPM_CIRR, /* Post Modify Register using Circular arithmetic by register */
+ do { fcirc_add(REG,VAL,MuV); } while (0),
+ ()
+)
+
+
+
+DEF_MACRO(
fSCALE, /* scale by N */
(((size8s_t)(A))<<N),
/* optional attributes */
diff --git a/target/hexagon/imported/shift.idef b/target/hexagon/imported/shift.idef
index e328ab7..b32c4e0 100644
--- a/target/hexagon/imported/shift.idef
+++ b/target/hexagon/imported/shift.idef
@@ -1029,6 +1029,53 @@ Q6INSN(S4_clbpaddi,"Rd32=add(clb(Rss32),#s6)",ATTRIBS(A_ARCHV2),
{ RdV = (fMAX(fCL1_8(RssV),fCL1_8(~RssV)))+siV;})
+
+Q6INSN(S2_cabacdecbin,"Rdd32=decbin(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),"CABAC decode bin",
+{
+ fHIDE(size4u_t state;)
+ fHIDE(size4u_t valMPS;)
+ fHIDE(size4u_t bitpos;)
+ fHIDE(size4u_t range;)
+ fHIDE(size4u_t offset;)
+ fHIDE(size4u_t rLPS;)
+ fHIDE(size4u_t rMPS;)
+
+ state = fEXTRACTU_RANGE( fGETWORD(1,RttV) ,5,0);
+ valMPS = fEXTRACTU_RANGE( fGETWORD(1,RttV) ,8,8);
+ bitpos = fEXTRACTU_RANGE( fGETWORD(0,RttV) ,4,0);
+ range = fGETWORD(0,RssV);
+ offset = fGETWORD(1,RssV);
+
+ /* calculate rLPS */
+ range <<= bitpos;
+ offset <<= bitpos;
+ rLPS = rLPS_table_64x4[state][ (range >>29)&3];
+ rLPS = rLPS << 23; /* left aligned */
+
+ /* calculate rMPS */
+ rMPS= (range&0xff800000) - rLPS;
+
+ /* most probable region */
+ if (offset < rMPS) {
+ RddV = AC_next_state_MPS_64[state];
+ fINSERT_RANGE(RddV,8,8,valMPS);
+ fINSERT_RANGE(RddV,31,23,(rMPS>>23));
+ fSETWORD(1,RddV,offset);
+ fWRITE_P0(valMPS);
+
+
+ }
+ /* least probable region */
+ else {
+ RddV = AC_next_state_LPS_64[state];
+ fINSERT_RANGE(RddV,8,8,((!state)?(1-valMPS):(valMPS)));
+ fINSERT_RANGE(RddV,31,23,(rLPS>>23));
+ fSETWORD(1,RddV,(offset-rMPS));
+ fWRITE_P0((valMPS^1));
+ }
+})
+
+
Q6INSN(S2_clb,"Rd32=clb(Rs32)",ATTRIBS(),
"Count leading bits", {RdV = fMAX(fCL1_4(RsV),fCL1_4(~RsV));})
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 5756a1d..2e34591 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -40,14 +40,15 @@ struct Instruction {
uint32_t iclass:6;
uint32_t slot:3;
- uint32_t part1:1; /*
+ uint32_t which_extended:1; /* If has an extender, which immediate */
+ uint32_t new_value_producer_slot:4;
+
+ bool part1; /*
* cmp-jumps are split into two insns.
* set for the compare and clear for the jump
*/
- uint32_t extension_valid:1; /* Has a constant extender attached */
- uint32_t which_extended:1; /* If has an extender, which immediate */
- uint32_t is_endloop:1; /* This is an end of loop */
- uint32_t new_value_producer_slot:4;
+ bool extension_valid; /* Has a constant extender attached */
+ bool is_endloop; /* This is an end of loop */
int32_t immed[IMMEDS_MAX]; /* immediate field */
};
@@ -58,13 +59,13 @@ struct Packet {
uint16_t encod_pkt_size_in_bytes;
/* Pre-decodes about COF */
- uint32_t pkt_has_cof:1; /* Has any change-of-flow */
- uint32_t pkt_has_endloop:1;
+ bool pkt_has_cof; /* Has any change-of-flow */
+ bool pkt_has_endloop;
- uint32_t pkt_has_dczeroa:1;
+ bool pkt_has_dczeroa;
- uint32_t pkt_has_store_s0:1;
- uint32_t pkt_has_store_s1:1;
+ bool pkt_has_store_s0;
+ bool pkt_has_store_s1;
Insn insn[INSTRUCTIONS_MAX];
};
diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h
index 2da85c8..6b20aff 100644
--- a/target/hexagon/internal.h
+++ b/target/hexagon/internal.h
@@ -22,11 +22,12 @@
* Change HEX_DEBUG to 1 to turn on debugging output
*/
#define HEX_DEBUG 0
-#if HEX_DEBUG
-#define HEX_DEBUG_LOG(...) qemu_log(__VA_ARGS__)
-#else
-#define HEX_DEBUG_LOG(...) do { } while (0)
-#endif
+#define HEX_DEBUG_LOG(...) \
+ do { \
+ if (HEX_DEBUG) { \
+ qemu_log(__VA_ARGS__); \
+ } \
+ } while (0)
int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index cfcb817..b726c3b 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -133,6 +133,38 @@
CHECK_NOSHUF; \
tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \
} while (0)
+
+#define MEM_STORE1_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store1i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store1, (void)0))
+#define MEM_STORE1(VA, DATA, SLOT) \
+ MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE2_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store2i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store2, (void)0))
+#define MEM_STORE2(VA, DATA, SLOT) \
+ MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE4_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store4i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store4, (void)0))
+#define MEM_STORE4(VA, DATA, SLOT) \
+ MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE8_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store8i, \
+ __builtin_choose_expr(TYPE_TCGV_I64(X), \
+ gen_store8, (void)0))
+#define MEM_STORE8(VA, DATA, SLOT) \
+ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
#else
#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
@@ -190,6 +222,13 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num)
(((HIBIT) - (LOWBIT) + 1) ? \
extract64((INREG), (LOWBIT), ((HIBIT) - (LOWBIT) + 1)) : \
0LL)
+#define fINSERT_RANGE(INREG, HIBIT, LOWBIT, INVAL) \
+ do { \
+ int width = ((HIBIT) - (LOWBIT) + 1); \
+ INREG = (width >= 0 ? \
+ deposit64((INREG), (LOWBIT), width, (INVAL)) : \
+ INREG); \
+ } while (0)
#define f8BITSOF(VAL) ((VAL) ? 0xff : 0x00)
@@ -285,6 +324,39 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
#define fPCALIGN(IMM) IMM = (IMM & ~PCALIGN_MASK)
+#ifdef QEMU_GENERATE
+static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
+{
+ /*
+ * Section 2.2.4 of the Hexagon V67 Programmer's Reference Manual
+ *
+ * The "I" value from a modifier register is divided into two pieces
+ * LSB bits 23:17
+ * MSB bits 31:28
+ * The value is signed
+ *
+ * At the end we shift the result according to the shift argument
+ */
+ TCGv msb = tcg_temp_new();
+ TCGv lsb = tcg_temp_new();
+
+ tcg_gen_extract_tl(lsb, val, 17, 7);
+ tcg_gen_sari_tl(msb, val, 21);
+ tcg_gen_deposit_tl(result, msb, lsb, 0, 7);
+
+ tcg_gen_shli_tl(result, result, shift);
+
+ tcg_temp_free(msb);
+ tcg_temp_free(lsb);
+
+ return result;
+}
+#define fREAD_IREG(VAL, SHIFT) gen_read_ireg(ireg, (VAL), (SHIFT))
+#else
+#define fREAD_IREG(VAL) \
+ (fSXTN(11, 64, (((VAL) & 0xf0000000) >> 21) | ((VAL >> 17) & 0x7f)))
+#endif
+
#define fREAD_LR() (READ_REG(HEX_REG_LR))
#define fWRITE_LR(A) WRITE_RREG(HEX_REG_LR, A)
@@ -341,8 +413,6 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
#define fWRITE_LC0(VAL) WRITE_RREG(HEX_REG_LC0, VAL)
#define fWRITE_LC1(VAL) WRITE_RREG(HEX_REG_LC1, VAL)
-#define fCARRY_FROM_ADD(A, B, C) carry_from_add64(A, B, C)
-
#define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1)
#define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL))
#define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG))
@@ -402,6 +472,21 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
#define fCAST8S_16S(A) (int128_exts64(A))
#define fCAST16S_8S(A) (int128_getlo(A))
+#ifdef QEMU_GENERATE
+#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM)
+#define fEA_RRs(REG, REG2, SCALE) \
+ do { \
+ TCGv tmp = tcg_temp_new(); \
+ tcg_gen_shli_tl(tmp, REG2, SCALE); \
+ tcg_gen_add_tl(EA, REG, tmp); \
+ tcg_temp_free(tmp); \
+ } while (0)
+#define fEA_IRs(IMM, REG, SCALE) \
+ do { \
+ tcg_gen_shli_tl(EA, REG, SCALE); \
+ tcg_gen_addi_tl(EA, EA, IMM); \
+ } while (0)
+#else
#define fEA_RI(REG, IMM) \
do { \
EA = REG + IMM; \
@@ -414,12 +499,21 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
do { \
EA = IMM + (REG << SCALE); \
} while (0)
+#endif
#ifdef QEMU_GENERATE
#define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM)
#define fEA_REG(REG) tcg_gen_mov_tl(EA, REG)
+#define fEA_BREVR(REG) gen_helper_fbrev(EA, REG)
#define fPM_I(REG, IMM) tcg_gen_addi_tl(REG, REG, IMM)
#define fPM_M(REG, MVAL) tcg_gen_add_tl(REG, REG, MVAL)
+#define fPM_CIRI(REG, IMM, MVAL) \
+ do { \
+ TCGv tcgv_siV = tcg_const_tl(siV); \
+ gen_helper_fcircadd(REG, REG, tcgv_siV, MuV, \
+ hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(tcgv_siV); \
+ } while (0)
#else
#define fEA_IMM(IMM) do { EA = (IMM); } while (0)
#define fEA_REG(REG) do { EA = (REG); } while (0)
@@ -496,23 +590,43 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
gen_load_locked##SIZE##SIGN(DST, EA, ctx->mem_idx);
#endif
+#ifdef QEMU_GENERATE
+#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, insn->slot)
+#else
#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, slot)
+#endif
#ifdef QEMU_GENERATE
#define fSTORE_LOCKED(NUM, SIZE, EA, SRC, PRED) \
gen_store_conditional##SIZE(env, ctx, PdN, PRED, EA, SRC);
#endif
+#ifdef QEMU_GENERATE
+#define GETBYTE_FUNC(X) \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_get_byte, \
+ __builtin_choose_expr(TYPE_TCGV_I64(X), \
+ gen_get_byte_i64, (void)0))
+#define fGETBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, true)
+#define fGETUBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, false)
+#else
#define fGETBYTE(N, SRC) ((int8_t)((SRC >> ((N) * 8)) & 0xff))
#define fGETUBYTE(N, SRC) ((uint8_t)((SRC >> ((N) * 8)) & 0xff))
+#endif
#define fSETBYTE(N, DST, VAL) \
do { \
DST = (DST & ~(0x0ffLL << ((N) * 8))) | \
(((uint64_t)((VAL) & 0x0ffLL)) << ((N) * 8)); \
} while (0)
+
+#ifdef QEMU_GENERATE
+#define fGETHALF(N, SRC) gen_get_half(HALF, N, SRC, true)
+#define fGETUHALF(N, SRC) gen_get_half(HALF, N, SRC, false)
+#else
#define fGETHALF(N, SRC) ((int16_t)((SRC >> ((N) * 16)) & 0xffff))
#define fGETUHALF(N, SRC) ((uint16_t)((SRC >> ((N) * 16)) & 0xffff))
+#endif
#define fSETHALF(N, DST, VAL) \
do { \
DST = (DST & ~(0x0ffffLL << ((N) * 16))) | \
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index bb0b4fb..6fd9360 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -173,7 +173,6 @@ hexagon_ss.add(files(
'printinsn.c',
'arch.c',
'fma_emu.c',
- 'conv_emu.c',
))
target_arch += {'hexagon': hexagon_ss}
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 2c6d718..63dd685 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -25,7 +25,6 @@
#include "arch.h"
#include "hex_arch_types.h"
#include "fma_emu.h"
-#include "conv_emu.h"
#define SF_BIAS 127
#define SF_MANTBITS 23
@@ -35,7 +34,7 @@ static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env,
uint32_t exception,
uintptr_t pc)
{
- CPUState *cs = CPU(hexagon_env_get_cpu(env));
+ CPUState *cs = env_cpu(env);
qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
cs->exception_index = exception;
cpu_loop_exit_restore(cs, pc);
@@ -46,8 +45,8 @@ void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
do_raise_exception_err(env, excp, 0);
}
-static inline void log_reg_write(CPUHexagonState *env, int rnum,
- target_ulong val, uint32_t slot)
+static void log_reg_write(CPUHexagonState *env, int rnum,
+ target_ulong val, uint32_t slot)
{
HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
rnum, val, val);
@@ -57,14 +56,13 @@ static inline void log_reg_write(CPUHexagonState *env, int rnum,
HEX_DEBUG_LOG("\n");
env->new_value[rnum] = val;
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- env->reg_written[rnum] = 1;
-#endif
+ if (HEX_DEBUG) {
+ /* Do this so HELPER(debug_commit_end) will know */
+ env->reg_written[rnum] = 1;
+ }
}
-static inline void log_pred_write(CPUHexagonState *env, int pnum,
- target_ulong val)
+static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
{
HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
" (0x" TARGET_FMT_lx ")\n",
@@ -79,8 +77,8 @@ static inline void log_pred_write(CPUHexagonState *env, int pnum,
}
}
-static inline void log_store32(CPUHexagonState *env, target_ulong addr,
- target_ulong val, int width, int slot)
+static void log_store32(CPUHexagonState *env, target_ulong addr,
+ target_ulong val, int width, int slot)
{
HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
", %" PRId32 " [0x08%" PRIx32 "])\n",
@@ -90,8 +88,8 @@ static inline void log_store32(CPUHexagonState *env, target_ulong addr,
env->mem_log_stores[slot].data32 = val;
}
-static inline void log_store64(CPUHexagonState *env, target_ulong addr,
- int64_t val, int width, int slot)
+static void log_store64(CPUHexagonState *env, target_ulong addr,
+ int64_t val, int width, int slot)
{
HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
", %" PRId64 " [0x016%" PRIx64 "])\n",
@@ -101,7 +99,7 @@ static inline void log_store64(CPUHexagonState *env, target_ulong addr,
env->mem_log_stores[slot].data64 = val;
}
-static inline void write_new_pc(CPUHexagonState *env, target_ulong addr)
+static void write_new_pc(CPUHexagonState *env, target_ulong addr)
{
HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
@@ -119,7 +117,6 @@ static inline void write_new_pc(CPUHexagonState *env, target_ulong addr)
}
}
-#if HEX_DEBUG
/* Handy place to set a breakpoint */
void HELPER(debug_start_packet)(CPUHexagonState *env)
{
@@ -130,14 +127,12 @@ void HELPER(debug_start_packet)(CPUHexagonState *env)
env->reg_written[i] = 0;
}
}
-#endif
-static inline int32_t new_pred_value(CPUHexagonState *env, int pnum)
+static int32_t new_pred_value(CPUHexagonState *env, int pnum)
{
return env->new_pred_value[pnum];
}
-#if HEX_DEBUG
/* Checks for bookkeeping errors between disassembly context and runtime */
void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
{
@@ -147,7 +142,6 @@ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
g_assert_not_reached();
}
}
-#endif
void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
{
@@ -173,7 +167,6 @@ void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
}
}
-#if HEX_DEBUG
static void print_store(CPUHexagonState *env, int slot)
{
if (!(env->slot_cancelled & (1 << slot))) {
@@ -257,35 +250,26 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
env->gpr[HEX_REG_QEMU_INSN_CNT]);
}
-#endif
-
-static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
-{
- int32_t length = M & 0x0001ffff;
- uint32_t new_ptr = RxV + offset;
- uint32_t start_addr = CS;
- uint32_t end_addr = start_addr + length;
-
- if (new_ptr >= end_addr) {
- new_ptr -= length;
- } else if (new_ptr < start_addr) {
- new_ptr += length;
- }
-
- return new_ptr;
-}
int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
{
- int32_t K_const = (M >> 24) & 0xf;
- int32_t length = M & 0x1ffff;
- int32_t mask = (1 << (K_const + 2)) - 1;
+ int32_t K_const = sextract32(M, 24, 4);
+ int32_t length = sextract32(M, 0, 17);
uint32_t new_ptr = RxV + offset;
- uint32_t start_addr = RxV & (~mask);
- uint32_t end_addr = start_addr | length;
+ uint32_t start_addr;
+ uint32_t end_addr;
if (K_const == 0 && length >= 4) {
- return fcircadd_v4(RxV, offset, M, CS);
+ start_addr = CS;
+ end_addr = start_addr + length;
+ } else {
+ /*
+ * Versions v3 and earlier used the K value to specify a power-of-2 size
+ * 2^(K+2) that is greater than the buffer length
+ */
+ int32_t mask = (1 << (K_const + 2)) - 1;
+ start_addr = RxV & (~mask);
+ end_addr = start_addr | length;
}
if (new_ptr >= end_addr) {
@@ -297,24 +281,103 @@ int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
return new_ptr;
}
+uint32_t HELPER(fbrev)(uint32_t addr)
+{
+ /*
+ * Bit reverse the low 16 bits of the address
+ */
+ return deposit32(addr, 0, 16, revbit16(addr));
+}
+
+static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
+{
+ return make_float32(
+ ((sign & 1) << 31) |
+ ((exp & 0xff) << SF_MANTBITS) |
+ (mant & ((1 << SF_MANTBITS) - 1)));
+}
+
/*
- * Hexagon FP operations return ~0 insteat of NaN
- * The hex_check_sfnan/hex_check_dfnan functions perform this check
+ * sfrecipa, sfinvsqrta have two 32-bit results
+ * r0,p0=sfrecipa(r1,r2)
+ * r0,p0=sfinvsqrta(r1)
+ *
+ * Since helpers can only return a single value, we pack the two results
+ * into a 64-bit value.
*/
-static float32 hex_check_sfnan(float32 x)
+uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
{
- if (float32_is_any_nan(x)) {
- return make_float32(0xFFFFFFFFU);
+ int32_t PeV = 0;
+ float32 RdV;
+ int idx;
+ int adjust;
+ int mant;
+ int exp;
+
+ arch_fpop_start(env);
+ if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
+ PeV = adjust;
+ idx = (RtV >> 16) & 0x7f;
+ mant = (recip_lookup_table[idx] << 15) | 1;
+ exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
+ RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
+ }
+ arch_fpop_end(env);
+ return ((uint64_t)RdV << 32) | PeV;
+}
+
+uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
+{
+ int PeV = 0;
+ float32 RdV;
+ int idx;
+ int adjust;
+ int mant;
+ int exp;
+
+ arch_fpop_start(env);
+ if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
+ PeV = adjust;
+ idx = (RsV >> 17) & 0x7f;
+ mant = (invsqrt_lookup_table[idx] << 15);
+ exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
+ RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
+ }
+ arch_fpop_end(env);
+ return ((uint64_t)RdV << 32) | PeV;
+}
+
+int64_t HELPER(vacsh_val)(CPUHexagonState *env,
+ int64_t RxxV, int64_t RssV, int64_t RttV)
+{
+ for (int i = 0; i < 4; i++) {
+ int xv = sextract64(RxxV, i * 16, 16);
+ int sv = sextract64(RssV, i * 16, 16);
+ int tv = sextract64(RttV, i * 16, 16);
+ int max;
+ xv = xv + tv;
+ sv = sv - tv;
+ max = xv > sv ? xv : sv;
+ /* Note that fSATH can set the OVF bit in usr */
+ RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
}
- return x;
+ return RxxV;
}
-static float64 hex_check_dfnan(float64 x)
+int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
+ int64_t RxxV, int64_t RssV, int64_t RttV)
{
- if (float64_is_any_nan(x)) {
- return make_float64(0xFFFFFFFFFFFFFFFFULL);
+ int32_t PeV = 0;
+ for (int i = 0; i < 4; i++) {
+ int xv = sextract64(RxxV, i * 16, 16);
+ int sv = sextract64(RssV, i * 16, 16);
+ int tv = sextract64(RttV, i * 16, 16);
+ xv = xv + tv;
+ sv = sv - tv;
+ PeV = deposit32(PeV, i * 2, 1, (xv > sv));
+ PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
}
- return x;
+ return PeV;
}
/*
@@ -332,8 +395,8 @@ static void check_noshuf(CPUHexagonState *env, uint32_t slot)
}
}
-static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
- target_ulong vaddr)
+static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
+ target_ulong vaddr)
{
uint8_t retval;
check_noshuf(env, slot);
@@ -341,8 +404,8 @@ static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
return retval;
}
-static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
- target_ulong vaddr)
+static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
+ target_ulong vaddr)
{
uint16_t retval;
check_noshuf(env, slot);
@@ -350,8 +413,8 @@ static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
return retval;
}
-static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
- target_ulong vaddr)
+static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
+ target_ulong vaddr)
{
uint32_t retval;
check_noshuf(env, slot);
@@ -359,8 +422,8 @@ static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
return retval;
}
-static inline uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
- target_ulong vaddr)
+static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
+ target_ulong vaddr)
{
uint64_t retval;
check_noshuf(env, slot);
@@ -374,7 +437,6 @@ float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
float64 out_f64;
arch_fpop_start(env);
out_f64 = float32_to_float64(RsV, &env->fp_status);
- out_f64 = hex_check_dfnan(out_f64);
arch_fpop_end(env);
return out_f64;
}
@@ -384,7 +446,6 @@ float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
float32 out_f32;
arch_fpop_start(env);
out_f32 = float64_to_float32(RssV, &env->fp_status);
- out_f32 = hex_check_sfnan(out_f32);
arch_fpop_end(env);
return out_f32;
}
@@ -394,7 +455,6 @@ float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
float32 RdV;
arch_fpop_start(env);
RdV = uint32_to_float32(RsV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -404,7 +464,6 @@ float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
float64 RddV;
arch_fpop_start(env);
RddV = uint32_to_float64(RsV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -414,7 +473,6 @@ float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
float32 RdV;
arch_fpop_start(env);
RdV = int32_to_float32(RsV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -424,7 +482,6 @@ float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
float64 RddV;
arch_fpop_start(env);
RddV = int32_to_float64(RsV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -434,7 +491,6 @@ float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
float32 RdV;
arch_fpop_start(env);
RdV = uint64_to_float32(RssV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -444,7 +500,6 @@ float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
float64 RddV;
arch_fpop_start(env);
RddV = uint64_to_float64(RssV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -454,7 +509,6 @@ float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
float32 RdV;
arch_fpop_start(env);
RdV = int64_to_float32(RssV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -464,16 +518,21 @@ float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
float64 RddV;
arch_fpop_start(env);
RddV = int64_to_float64(RssV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
-int32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
+uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
{
- int32_t RdV;
+ uint32_t RdV;
arch_fpop_start(env);
- RdV = conv_sf_to_4u(RsV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = 0;
+ } else {
+ RdV = float32_to_uint32(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
@@ -482,16 +541,28 @@ int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
{
int32_t RdV;
arch_fpop_start(env);
- RdV = conv_sf_to_4s(RsV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = -1;
+ } else {
+ RdV = float32_to_int32(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
-int64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
+uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
{
- int64_t RddV;
+ uint64_t RddV;
arch_fpop_start(env);
- RddV = conv_sf_to_8u(RsV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = 0;
+ } else {
+ RddV = float32_to_uint64(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
@@ -500,16 +571,28 @@ int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
{
int64_t RddV;
arch_fpop_start(env);
- RddV = conv_sf_to_8s(RsV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = -1;
+ } else {
+ RddV = float32_to_int64(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
-int32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
+uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
{
- int32_t RdV;
+ uint32_t RdV;
arch_fpop_start(env);
- RdV = conv_df_to_4u(RssV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = 0;
+ } else {
+ RdV = float64_to_uint32(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
@@ -518,16 +601,28 @@ int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
{
int32_t RdV;
arch_fpop_start(env);
- RdV = conv_df_to_4s(RssV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = -1;
+ } else {
+ RdV = float64_to_int32(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
-int64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
+uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
{
- int64_t RddV;
+ uint64_t RddV;
arch_fpop_start(env);
- RddV = conv_df_to_8u(RssV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = 0;
+ } else {
+ RddV = float64_to_uint64(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
@@ -536,17 +631,28 @@ int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
{
int64_t RddV;
arch_fpop_start(env);
- RddV = conv_df_to_8s(RssV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = -1;
+ } else {
+ RddV = float64_to_int64(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
-int32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
+uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
{
- int32_t RdV;
+ uint32_t RdV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RdV = conv_sf_to_4u(RsV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = 0;
+ } else {
+ RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
@@ -555,18 +661,28 @@ int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
{
int32_t RdV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RdV = conv_sf_to_4s(RsV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = -1;
+ } else {
+ RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
-int64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
+uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
{
- int64_t RddV;
+ uint64_t RddV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RddV = conv_sf_to_8u(RsV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = 0;
+ } else {
+ RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
@@ -575,18 +691,28 @@ int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
{
int64_t RddV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RddV = conv_sf_to_8s(RsV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float32_is_any_nan(RsV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = -1;
+ } else {
+ RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
-int32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
+uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
{
- int32_t RdV;
+ uint32_t RdV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RdV = conv_df_to_4u(RssV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = 0;
+ } else {
+ RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
@@ -595,18 +721,28 @@ int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
{
int32_t RdV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RdV = conv_df_to_4s(RssV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RdV = -1;
+ } else {
+ RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RdV;
}
-int64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
+uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
{
- int64_t RddV;
+ uint64_t RddV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RddV = conv_df_to_8u(RssV, &env->fp_status);
+ /* Hexagon checks the sign before rounding */
+ if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = 0;
+ } else {
+ RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
@@ -615,8 +751,13 @@ int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
{
int64_t RddV;
arch_fpop_start(env);
- set_float_rounding_mode(float_round_to_zero, &env->fp_status);
- RddV = conv_df_to_8s(RssV, &env->fp_status);
+ /* Hexagon returns -1 for NaN */
+ if (float64_is_any_nan(RssV)) {
+ float_raise(float_flag_invalid, &env->fp_status);
+ RddV = -1;
+ } else {
+ RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
+ }
arch_fpop_end(env);
return RddV;
}
@@ -626,7 +767,6 @@ float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
float32 RdV;
arch_fpop_start(env);
RdV = float32_add(RsV, RtV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -636,7 +776,6 @@ float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
float32 RdV;
arch_fpop_start(env);
RdV = float32_sub(RsV, RtV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -688,7 +827,6 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
float32 RdV;
arch_fpop_start(env);
RdV = float32_maxnum(RsV, RtV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -698,7 +836,6 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
float32 RdV;
arch_fpop_start(env);
RdV = float32_minnum(RsV, RtV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -765,7 +902,6 @@ float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
float64 RddV;
arch_fpop_start(env);
RddV = float64_add(RssV, RttV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -775,7 +911,6 @@ float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
float64 RddV;
arch_fpop_start(env);
RddV = float64_sub(RssV, RttV, &env->fp_status);
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -788,7 +923,6 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
float_raise(float_flag_invalid, &env->fp_status);
}
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -801,7 +935,6 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
float_raise(float_flag_invalid, &env->fp_status);
}
- RddV = hex_check_dfnan(RddV);
arch_fpop_end(env);
return RddV;
}
@@ -877,7 +1010,6 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
float32 RdV;
arch_fpop_start(env);
RdV = internal_mpyf(RsV, RtV, &env->fp_status);
- RdV = hex_check_sfnan(RdV);
arch_fpop_end(env);
return RdV;
}
@@ -887,7 +1019,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
{
arch_fpop_start(env);
RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
- RxV = hex_check_sfnan(RxV);
arch_fpop_end(env);
return RxV;
}
@@ -919,7 +1050,6 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
RxV = check_nan(RxV, RsV, &env->fp_status);
RxV = check_nan(RxV, RtV, &env->fp_status);
tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
- tmp = hex_check_sfnan(tmp);
if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
RxV = tmp;
}
@@ -934,12 +1064,11 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
arch_fpop_start(env);
neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
- RxV = hex_check_sfnan(RxV);
arch_fpop_end(env);
return RxV;
}
-static inline bool is_inf_prod(int32_t a, int32_t b)
+static bool is_inf_prod(int32_t a, int32_t b)
{
return (float32_is_infinity(a) && float32_is_infinity(b)) ||
(float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
@@ -949,8 +1078,8 @@ static inline bool is_inf_prod(int32_t a, int32_t b)
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- int infinp;
- int infminusinf;
+ bool infinp;
+ bool infminusinf;
float32 tmp;
arch_fpop_start(env);
@@ -965,7 +1094,6 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
RxV = check_nan(RxV, RsV, &env->fp_status);
RxV = check_nan(RxV, RtV, &env->fp_status);
tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
- tmp = hex_check_sfnan(tmp);
if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
RxV = tmp;
}
@@ -983,8 +1111,8 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- int infinp;
- int infminusinf;
+ bool infinp;
+ bool infminusinf;
float32 tmp;
arch_fpop_start(env);
@@ -1000,7 +1128,6 @@ float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
RxV = check_nan(RxV, RtV, &env->fp_status);
float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
- tmp = hex_check_sfnan(tmp);
if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
RxV = tmp;
}
@@ -1024,13 +1151,11 @@ float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
float64_is_normal(RttV)) {
RddV = float64_mul(RssV, make_float64(0x4330000000000000),
&env->fp_status);
- RddV = hex_check_dfnan(RddV);
} else if (float64_is_denormal(RttV) &&
(float64_getexp(RssV) >= 512) &&
float64_is_normal(RssV)) {
RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
&env->fp_status);
- RddV = hex_check_dfnan(RddV);
} else {
RddV = RssV;
}
@@ -1043,7 +1168,6 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
{
arch_fpop_start(env);
RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
- RxxV = hex_check_dfnan(RxxV);
arch_fpop_end(env);
return RxxV;
}
diff --git a/target/hexagon/reg_fields.c b/target/hexagon/reg_fields.c
index bdcab79..6713203 100644
--- a/target/hexagon/reg_fields.c
+++ b/target/hexagon/reg_fields.c
@@ -18,10 +18,9 @@
#include "qemu/osdep.h"
#include "reg_fields.h"
-const RegField reg_field_info[] = {
+const RegField reg_field_info[NUM_REG_FIELDS] = {
#define DEF_REG_FIELD(TAG, START, WIDTH) \
{ START, WIDTH },
#include "reg_fields_def.h.inc"
- { 0, 0 }
#undef DEF_REG_FIELD
};
diff --git a/target/hexagon/reg_fields.h b/target/hexagon/reg_fields.h
index d3c86c94..9e2ad5d 100644
--- a/target/hexagon/reg_fields.h
+++ b/target/hexagon/reg_fields.h
@@ -23,8 +23,6 @@ typedef struct {
int width;
} RegField;
-extern const RegField reg_field_info[];
-
enum {
#define DEF_REG_FIELD(TAG, START, WIDTH) \
TAG,
@@ -33,4 +31,6 @@ enum {
#undef DEF_REG_FIELD
};
+extern const RegField reg_field_info[NUM_REG_FIELDS];
+
#endif
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index eeaad5f..9a37644 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -35,9 +35,7 @@ TCGv hex_this_PC;
TCGv hex_slot_cancelled;
TCGv hex_branch_taken;
TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
-#if HEX_DEBUG
TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-#endif
TCGv hex_new_pred_value[NUM_PREGS];
TCGv hex_pred_written;
TCGv hex_store_addr[STORES_MAX];
@@ -54,19 +52,42 @@ static const char * const hexagon_prednames[] = {
"p0", "p1", "p2", "p3"
};
-void gen_exception(int excp)
+static void gen_exception_raw(int excp)
{
TCGv_i32 helper_tmp = tcg_const_i32(excp);
gen_helper_raise_exception(cpu_env, helper_tmp);
tcg_temp_free_i32(helper_tmp);
}
-void gen_exception_debug(void)
+static void gen_exec_counters(DisasContext *ctx)
{
- gen_exception(EXCP_DEBUG);
+ tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
+ hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
+ tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
+ hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
+}
+
+static void gen_end_tb(DisasContext *ctx)
+{
+ gen_exec_counters(ctx);
+ tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+ if (ctx->base.singlestep_enabled) {
+ gen_exception_raw(EXCP_DEBUG);
+ } else {
+ tcg_gen_exit_tb(NULL, 0);
+ }
+ ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_exception_end_tb(DisasContext *ctx, int excp)
+{
+ gen_exec_counters(ctx);
+ tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+ gen_exception_raw(excp);
+ ctx->base.is_jmp = DISAS_NORETURN;
+
}
-#if HEX_DEBUG
#define PACKET_BUFFER_LEN 1028
static void print_pkt(Packet *pkt)
{
@@ -75,10 +96,12 @@ static void print_pkt(Packet *pkt)
HEX_DEBUG_LOG("%s", buf->str);
g_string_free(buf, true);
}
-#define HEX_DEBUG_PRINT_PKT(pkt) print_pkt(pkt)
-#else
-#define HEX_DEBUG_PRINT_PKT(pkt) /* nothing */
-#endif
+#define HEX_DEBUG_PRINT_PKT(pkt) \
+ do { \
+ if (HEX_DEBUG) { \
+ print_pkt(pkt); \
+ } \
+ } while (0)
static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
uint32_t words[])
@@ -88,8 +111,8 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
- words[nwords] = cpu_ldl_code(env,
- ctx->base.pc_next + nwords * sizeof(uint32_t));
+ words[nwords] =
+ translator_ldl(env, ctx->base.pc_next + nwords * sizeof(uint32_t));
found_end = is_packet_end(words[nwords]);
}
if (!found_end) {
@@ -148,17 +171,18 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
+ bitmap_zero(ctx->pregs_written, NUM_PREGS);
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
}
tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
- ctx->s1_store_processed = 0;
+ ctx->s1_store_processed = false;
-#if HEX_DEBUG
- /* Handy place to set a breakpoint before the packet executes */
- gen_helper_debug_start_packet(cpu_env);
- tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
-#endif
+ if (HEX_DEBUG) {
+ /* Handy place to set a breakpoint before the packet executes */
+ gen_helper_debug_start_packet(cpu_env);
+ tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
+ }
/* Initialize the runtime state for packet semantics */
if (need_pc(pkt)) {
@@ -185,7 +209,7 @@ static void mark_implicit_reg_write(DisasContext *ctx, Insn *insn,
int attrib, int rnum)
{
if (GET_ATTRIB(insn->opcode, attrib)) {
- int is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC);
+ bool is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC);
if (is_predicated && !is_preloaded(ctx, rnum)) {
tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
}
@@ -202,7 +226,7 @@ static void mark_implicit_pred_write(DisasContext *ctx, Insn *insn,
}
}
-static void mark_implicit_writes(DisasContext *ctx, Insn *insn)
+static void mark_implicit_reg_writes(DisasContext *ctx, Insn *insn)
{
mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_FP, HEX_REG_FP);
mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SP, HEX_REG_SP);
@@ -211,7 +235,10 @@ static void mark_implicit_writes(DisasContext *ctx, Insn *insn)
mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
+}
+static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn)
+{
mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P0, 0);
mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P1, 1);
mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P2, 2);
@@ -222,11 +249,11 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx,
Insn *insn, Packet *pkt)
{
if (insn->generate) {
- mark_implicit_writes(ctx, insn);
+ mark_implicit_reg_writes(ctx, insn);
insn->generate(env, ctx, insn, pkt);
+ mark_implicit_pred_writes(ctx, insn);
} else {
- gen_exception(HEX_EXCP_INVALID_OPCODE);
- ctx->base.is_jmp = DISAS_NORETURN;
+ gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
}
}
@@ -280,10 +307,11 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
for (i = 0; i < ctx->preg_log_idx; i++) {
int pred_num = ctx->preg_log[i];
tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
-#if HEX_DEBUG
- /* Do this so HELPER(debug_commit_end) will know */
- tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pred_num);
-#endif
+ if (HEX_DEBUG) {
+ /* Do this so HELPER(debug_commit_end) will know */
+ tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
+ 1 << pred_num);
+ }
}
}
@@ -292,20 +320,16 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
tcg_temp_free(pval);
}
-#if HEX_DEBUG
-static inline void gen_check_store_width(DisasContext *ctx, int slot_num)
+static void gen_check_store_width(DisasContext *ctx, int slot_num)
{
- TCGv slot = tcg_const_tl(slot_num);
- TCGv check = tcg_const_tl(ctx->store_width[slot_num]);
- gen_helper_debug_check_store_width(cpu_env, slot, check);
- tcg_temp_free(slot);
- tcg_temp_free(check);
+ if (HEX_DEBUG) {
+ TCGv slot = tcg_const_tl(slot_num);
+ TCGv check = tcg_const_tl(ctx->store_width[slot_num]);
+ gen_helper_debug_check_store_width(cpu_env, slot, check);
+ tcg_temp_free(slot);
+ tcg_temp_free(check);
+ }
}
-#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) \
- gen_check_store_width(ctx, slot_num)
-#else
-#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) /* nothing */
-#endif
static bool slot_is_predicated(Packet *pkt, int slot_num)
{
@@ -330,7 +354,7 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
if (slot_num == 1 && ctx->s1_store_processed) {
return;
}
- ctx->s1_store_processed = 1;
+ ctx->s1_store_processed = true;
if (is_predicated) {
TCGv cancelled = tcg_temp_new();
@@ -355,25 +379,25 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
*/
switch (ctx->store_width[slot_num]) {
case 1:
- HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+ gen_check_store_width(ctx, slot_num);
tcg_gen_qemu_st8(hex_store_val32[slot_num],
hex_store_addr[slot_num],
ctx->mem_idx);
break;
case 2:
- HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+ gen_check_store_width(ctx, slot_num);
tcg_gen_qemu_st16(hex_store_val32[slot_num],
hex_store_addr[slot_num],
ctx->mem_idx);
break;
case 4:
- HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+ gen_check_store_width(ctx, slot_num);
tcg_gen_qemu_st32(hex_store_val32[slot_num],
hex_store_addr[slot_num],
ctx->mem_idx);
break;
case 8:
- HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+ gen_check_store_width(ctx, slot_num);
tcg_gen_qemu_st64(hex_store_val64[slot_num],
hex_store_addr[slot_num],
ctx->mem_idx);
@@ -451,14 +475,6 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt)
ctx->num_insns += num_real_insns;
}
-static void gen_exec_counters(DisasContext *ctx)
-{
- tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
- hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
- tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
- hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
-}
-
static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
{
gen_reg_writes(ctx);
@@ -466,8 +482,7 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
process_store_log(ctx, pkt);
process_dczeroa(ctx, pkt);
update_exec_counters(ctx, pkt);
-#if HEX_DEBUG
- {
+ if (HEX_DEBUG) {
TCGv has_st0 =
tcg_const_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
TCGv has_st1 =
@@ -479,10 +494,9 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
tcg_temp_free(has_st0);
tcg_temp_free(has_st1);
}
-#endif
if (pkt->pkt_has_cof) {
- ctx->base.is_jmp = DISAS_NORETURN;
+ gen_end_tb(ctx);
}
}
@@ -495,8 +509,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
nwords = read_packet_words(env, ctx, words);
if (!nwords) {
- gen_exception(HEX_EXCP_INVALID_PACKET);
- ctx->base.is_jmp = DISAS_NORETURN;
+ gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
return;
}
@@ -509,8 +522,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
gen_commit_packet(ctx, &pkt);
ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
} else {
- gen_exception(HEX_EXCP_INVALID_PACKET);
- ctx->base.is_jmp = DISAS_NORETURN;
+ gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
}
}
@@ -540,9 +552,7 @@ static bool hexagon_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
- tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
- ctx->base.is_jmp = DISAS_NORETURN;
- gen_exception_debug();
+ gen_exception_end_tb(ctx, EXCP_DEBUG);
/*
* The address covered by the breakpoint must be included in
* [tb->pc, tb->pc + tb->size) in order to for it to be
@@ -589,14 +599,10 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
* The CPU log is used to compare against LLDB single stepping,
* so end the TLB after every packet.
*/
- HexagonCPU *hex_cpu = container_of(env, HexagonCPU, env);
+ HexagonCPU *hex_cpu = env_archcpu(env);
if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
ctx->base.is_jmp = DISAS_TOO_MANY;
}
-#if HEX_DEBUG
- /* When debugging, only put one packet per TB */
- ctx->base.is_jmp = DISAS_TOO_MANY;
-#endif
}
}
@@ -609,19 +615,12 @@ static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
gen_exec_counters(ctx);
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
if (ctx->base.singlestep_enabled) {
- gen_exception_debug();
+ gen_exception_raw(EXCP_DEBUG);
} else {
tcg_gen_exit_tb(NULL, 0);
}
break;
case DISAS_NORETURN:
- gen_exec_counters(ctx);
- tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
- if (ctx->base.singlestep_enabled) {
- gen_exception_debug();
- } else {
- tcg_gen_exit_tb(NULL, 0);
- }
break;
default:
g_assert_not_reached();
@@ -654,9 +653,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
#define NAME_LEN 64
static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
-#if HEX_DEBUG
static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
-#endif
static char new_pred_value_names[NUM_PREGS][NAME_LEN];
static char store_addr_names[STORES_MAX][NAME_LEN];
static char store_width_names[STORES_MAX][NAME_LEN];
@@ -669,11 +666,11 @@ void hexagon_translate_init(void)
opcode_init();
-#if HEX_DEBUG
- if (!qemu_logfile) {
- qemu_set_log(qemu_loglevel);
+ if (HEX_DEBUG) {
+ if (!qemu_logfile) {
+ qemu_set_log(qemu_loglevel);
+ }
}
-#endif
for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
hex_gpr[i] = tcg_global_mem_new(cpu_env,
@@ -685,13 +682,13 @@ void hexagon_translate_init(void)
offsetof(CPUHexagonState, new_value[i]),
new_value_names[i]);
-#if HEX_DEBUG
- snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
- hexagon_regnames[i]);
- hex_reg_written[i] = tcg_global_mem_new(cpu_env,
- offsetof(CPUHexagonState, reg_written[i]),
- reg_written_names[i]);
-#endif
+ if (HEX_DEBUG) {
+ snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
+ hexagon_regnames[i]);
+ hex_reg_written[i] = tcg_global_mem_new(cpu_env,
+ offsetof(CPUHexagonState, reg_written[i]),
+ reg_written_names[i]);
+ }
}
for (i = 0; i < NUM_PREGS; i++) {
hex_pred[i] = tcg_global_mem_new(cpu_env,
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 938f7fb..703fd13 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -34,17 +34,16 @@ typedef struct DisasContext {
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
+ DECLARE_BITMAP(pregs_written, NUM_PREGS);
uint8_t store_width[STORES_MAX];
- uint8_t s1_store_processed;
+ bool s1_store_processed;
} DisasContext;
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
{
-#if HEX_DEBUG
if (test_bit(rnum, ctx->regs_written)) {
HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum);
}
-#endif
ctx->reg_log[ctx->reg_log_idx] = rnum;
ctx->reg_log_idx++;
set_bit(rnum, ctx->regs_written);
@@ -60,6 +59,7 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
ctx->preg_log[ctx->preg_log_idx] = pnum;
ctx->preg_log_idx++;
+ set_bit(pnum, ctx->pregs_written);
}
static inline bool is_preloaded(DisasContext *ctx, int num)
@@ -86,8 +86,5 @@ extern TCGv hex_llsc_addr;
extern TCGv hex_llsc_val;
extern TCGv_i64 hex_llsc_val_i64;
-void gen_exception(int excp);
-void gen_exception_debug(void);
-
void process_store(DisasContext *ctx, Packet *pkt, int slot_num);
#endif
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 616af69..0992787 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -28,6 +28,7 @@ endif
CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal
+CFLAGS += -fno-unroll-loops
HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon
VPATH += $(HEX_SRC)
@@ -39,7 +40,12 @@ HEX_TESTS = first
HEX_TESTS += misc
HEX_TESTS += preg_alias
HEX_TESTS += dual_stores
+HEX_TESTS += multi_result
HEX_TESTS += mem_noshuf
+HEX_TESTS += circ
+HEX_TESTS += brev
+HEX_TESTS += load_unpack
+HEX_TESTS += load_align
HEX_TESTS += atomics
HEX_TESTS += fpstuff
diff --git a/tests/tcg/hexagon/brev.c b/tests/tcg/hexagon/brev.c
new file mode 100644
index 0000000..9736a24
--- /dev/null
+++ b/tests/tcg/hexagon/brev.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+#define NBITS 8
+#define SIZE (1 << NBITS)
+
+long long dbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+int wbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+short hbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+unsigned char bbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+
+/*
+ * We use the C preporcessor to deal with the combinations of types
+ */
+
+#define BREV_LOAD(SZ, RES, ADDR, INC) \
+ __asm__( \
+ "m0 = %2\n\t" \
+ "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
+ : "=r"(RES), "+r"(ADDR) \
+ : "r"(INC) \
+ : "m0")
+
+#define BREV_LOAD_b(RES, ADDR, INC) \
+ BREV_LOAD(b, RES, ADDR, INC)
+#define BREV_LOAD_ub(RES, ADDR, INC) \
+ BREV_LOAD(ub, RES, ADDR, INC)
+#define BREV_LOAD_h(RES, ADDR, INC) \
+ BREV_LOAD(h, RES, ADDR, INC)
+#define BREV_LOAD_uh(RES, ADDR, INC) \
+ BREV_LOAD(uh, RES, ADDR, INC)
+#define BREV_LOAD_w(RES, ADDR, INC) \
+ BREV_LOAD(w, RES, ADDR, INC)
+#define BREV_LOAD_d(RES, ADDR, INC) \
+ BREV_LOAD(d, RES, ADDR, INC)
+
+#define BREV_STORE(SZ, PART, ADDR, VAL, INC) \
+ __asm__( \
+ "m0 = %2\n\t" \
+ "mem" #SZ "(%0++m0:brev) = %1" PART "\n\t" \
+ : "+r"(ADDR) \
+ : "r"(VAL), "r"(INC) \
+ : "m0", "memory")
+
+#define BREV_STORE_b(ADDR, VAL, INC) \
+ BREV_STORE(b, "", ADDR, VAL, INC)
+#define BREV_STORE_h(ADDR, VAL, INC) \
+ BREV_STORE(h, "", ADDR, VAL, INC)
+#define BREV_STORE_f(ADDR, VAL, INC) \
+ BREV_STORE(h, ".H", ADDR, VAL, INC)
+#define BREV_STORE_w(ADDR, VAL, INC) \
+ BREV_STORE(w, "", ADDR, VAL, INC)
+#define BREV_STORE_d(ADDR, VAL, INC) \
+ BREV_STORE(d, "", ADDR, VAL, INC)
+
+#define BREV_STORE_NEW(SZ, ADDR, VAL, INC) \
+ __asm__( \
+ "m0 = %2\n\t" \
+ "{\n\t" \
+ " r5 = %1\n\t" \
+ " mem" #SZ "(%0++m0:brev) = r5.new\n\t" \
+ "}\n\t" \
+ : "+r"(ADDR) \
+ : "r"(VAL), "r"(INC) \
+ : "r5", "m0", "memory")
+
+#define BREV_STORE_bnew(ADDR, VAL, INC) \
+ BREV_STORE_NEW(b, ADDR, VAL, INC)
+#define BREV_STORE_hnew(ADDR, VAL, INC) \
+ BREV_STORE_NEW(h, ADDR, VAL, INC)
+#define BREV_STORE_wnew(ADDR, VAL, INC) \
+ BREV_STORE_NEW(w, ADDR, VAL, INC)
+
+int bitreverse(int x)
+{
+ int result = 0;
+ int i;
+ for (i = 0; i < NBITS; i++) {
+ result <<= 1;
+ result |= x & 1;
+ x >>= 1;
+ }
+ return result;
+}
+
+int sext8(int x)
+{
+ return (x << 24) >> 24;
+}
+
+void check(int i, long long result, long long expect)
+{
+ if (result != expect) {
+ printf("ERROR(%d): 0x%04llx != 0x%04llx\n", i, result, expect);
+ err++;
+ }
+}
+
+#define TEST_BREV_LOAD(SZ, TYPE, BUF, SHIFT, EXP) \
+ do { \
+ p = BUF; \
+ for (i = 0; i < SIZE; i++) { \
+ TYPE result; \
+ BREV_LOAD_##SZ(result, p, 1 << (SHIFT - NBITS)); \
+ check(i, result, EXP); \
+ } \
+ } while (0)
+
+#define TEST_BREV_STORE(SZ, TYPE, BUF, VAL, SHIFT) \
+ do { \
+ p = BUF; \
+ memset(BUF, 0xff, sizeof(BUF)); \
+ for (i = 0; i < SIZE; i++) { \
+ BREV_STORE_##SZ(p, (TYPE)(VAL), 1 << (SHIFT - NBITS)); \
+ } \
+ for (i = 0; i < SIZE; i++) { \
+ check(i, BUF[i], bitreverse(i)); \
+ } \
+ } while (0)
+
+#define TEST_BREV_STORE_NEW(SZ, BUF, SHIFT) \
+ do { \
+ p = BUF; \
+ memset(BUF, 0xff, sizeof(BUF)); \
+ for (i = 0; i < SIZE; i++) { \
+ BREV_STORE_##SZ(p, i, 1 << (SHIFT - NBITS)); \
+ } \
+ for (i = 0; i < SIZE; i++) { \
+ check(i, BUF[i], bitreverse(i)); \
+ } \
+ } while (0)
+
+/*
+ * We'll set high_half[i] = i << 16 for use in the .H form of store
+ * which stores from the high half of the word.
+ */
+int high_half[SIZE];
+
+int main()
+{
+ void *p;
+ int i;
+
+ for (i = 0; i < SIZE; i++) {
+ bbuf[i] = bitreverse(i);
+ hbuf[i] = bitreverse(i);
+ wbuf[i] = bitreverse(i);
+ dbuf[i] = bitreverse(i);
+ high_half[i] = i << 16;
+ }
+
+ TEST_BREV_LOAD(b, int, bbuf, 16, sext8(i));
+ TEST_BREV_LOAD(ub, int, bbuf, 16, i);
+ TEST_BREV_LOAD(h, int, hbuf, 15, i);
+ TEST_BREV_LOAD(uh, int, hbuf, 15, i);
+ TEST_BREV_LOAD(w, int, wbuf, 14, i);
+ TEST_BREV_LOAD(d, long long, dbuf, 13, i);
+
+ TEST_BREV_STORE(b, int, bbuf, i, 16);
+ TEST_BREV_STORE(h, int, hbuf, i, 15);
+ TEST_BREV_STORE(f, int, hbuf, high_half[i], 15);
+ TEST_BREV_STORE(w, int, wbuf, i, 14);
+ TEST_BREV_STORE(d, long long, dbuf, i, 13);
+
+ TEST_BREV_STORE_NEW(bnew, bbuf, 16);
+ TEST_BREV_STORE_NEW(hnew, hbuf, 15);
+ TEST_BREV_STORE_NEW(wnew, wbuf, 14);
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c
new file mode 100644
index 0000000..67a1aa3
--- /dev/null
+++ b/tests/tcg/hexagon/circ.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+#define DEBUG 0
+#define DEBUG_PRINTF(...) \
+ do { \
+ if (DEBUG) { \
+ printf(__VA_ARGS__); \
+ } \
+ } while (0)
+
+
+#define NBYTES (1 << 8)
+#define NHALFS (NBYTES / sizeof(short))
+#define NWORDS (NBYTES / sizeof(int))
+#define NDOBLS (NBYTES / sizeof(long long))
+
+long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
+int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
+short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
+unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
+
+/*
+ * We use the C preporcessor to deal with the combinations of types
+ */
+
+#define INIT(BUF, N) \
+ void init_##BUF(void) \
+ { \
+ int i; \
+ for (i = 0; i < N; i++) { \
+ BUF[i] = i; \
+ } \
+ } \
+
+INIT(bbuf, NBYTES)
+INIT(hbuf, NHALFS)
+INIT(wbuf, NWORDS)
+INIT(dbuf, NDOBLS)
+
+/*
+ * Macros for performing circular load
+ * RES result
+ * ADDR address
+ * START start address of buffer
+ * LEN length of buffer (in bytes)
+ * INC address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %2\n\t" \
+ "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
+ : "=r"(RES), "+r"(ADDR) \
+ : "r"(START), "r"(LEN) \
+ : "r4", "m0", "cs0")
+#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * The mreg has the following pieces
+ * mreg[31:28] increment[10:7]
+ * mreg[27:24] K value (used Hexagon v3 and earlier)
+ * mreg[23:17] increment[6:0]
+ * mreg[16:0] circular buffer length
+ */
+static int build_mreg(int inc, int K, int len)
+{
+ return ((inc & 0x780) << 21) |
+ ((K & 0xf) << 24) |
+ ((inc & 0x7f) << 17) |
+ (len & 0x1ffff);
+}
+
+#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %2\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %3\n\t" \
+ "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
+ : "=r"(RES), "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START) \
+ : "r4", "m1", "cs1")
+#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * Macros for performing circular store
+ * VAL value to store
+ * ADDR address
+ * START start address of buffer
+ * LEN length of buffer (in bytes)
+ * INC address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %1\n\t" \
+ "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
+ : "+r"(ADDR) \
+ : "r"(START), "r"(VAL), "r"(LEN) \
+ : "r4", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %1\n\t" \
+ "{\n\t" \
+ " r5 = %2\n\t" \
+ " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
+ "}\n\t" \
+ : "+r"(ADDR) \
+ : "r"(START), "r"(VAL), "r"(LEN) \
+ : "r4", "r5", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %1\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %2\n\t" \
+ "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
+ : "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START), \
+ "r"(VAL) \
+ : "r4", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %1\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %2\n\t" \
+ "{\n\t" \
+ " r5 = %3\n\t" \
+ " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
+ "}\n\t" \
+ : "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START), \
+ "r"(VAL) \
+ : "r4", "r5", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
+
+
+int err;
+
+/* We'll test increments +1 and -1 */
+void check_load(int i, long long result, int inc, int size)
+{
+ int expect = (i * inc);
+ while (expect >= size) {
+ expect -= size;
+ }
+ while (expect < 0) {
+ expect += size;
+ }
+ if (result != expect) {
+ printf("ERROR(%d): %lld != %d\n", i, result, expect);
+ err++;
+ }
+}
+
+#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
+void circ_test_load_imm_##SZ(void) \
+{ \
+ TYPE *p = (TYPE *)BUF; \
+ int size = 10; \
+ int i; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
+ } \
+ p = (TYPE *)BUF; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
+ } \
+}
+
+TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d)
+TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld)
+
+#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
+void circ_test_load_reg_##SZ(void) \
+{ \
+ TYPE *p = (TYPE *)BUF; \
+ int size = 13; \
+ int i; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, 1, size); \
+ } \
+ p = (TYPE *)BUF; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, -1, size); \
+ } \
+}
+
+TEST_LOAD_REG(b, char, bbuf, NBYTES, d)
+TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d)
+TEST_LOAD_REG(h, short, hbuf, NHALFS, d)
+TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d)
+TEST_LOAD_REG(w, int, wbuf, NWORDS, d)
+TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld)
+
+/* The circular stores will wrap around somewhere inside the buffer */
+#define CIRC_VAL(SZ, TYPE, BUFSIZE) \
+TYPE circ_val_##SZ(int i, int inc, int size) \
+{ \
+ int mod = BUFSIZE % size; \
+ int elem = i * inc; \
+ if (elem < 0) { \
+ if (-elem <= size - mod) { \
+ return (elem + BUFSIZE - mod); \
+ } else { \
+ return (elem + BUFSIZE + size - mod); \
+ } \
+ } else if (elem < mod) {\
+ return (elem + BUFSIZE - mod); \
+ } else { \
+ return (elem + BUFSIZE - size - mod); \
+ } \
+}
+
+CIRC_VAL(b, unsigned char, NBYTES)
+CIRC_VAL(h, short, NHALFS)
+CIRC_VAL(w, int, NWORDS)
+CIRC_VAL(d, long long, NDOBLS)
+
+/*
+ * Circular stores should only write to the first "size" elements of the buffer
+ * the remainder of the elements should have BUF[i] == i
+ */
+#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
+void check_store_##SZ(int inc, int size) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) { \
+ DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
+ i, BUF[i], circ_val_##SZ(i, inc, size)); \
+ if (BUF[i] != circ_val_##SZ(i, inc, size)) { \
+ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \
+ i, BUF[i], circ_val_##SZ(i, inc, size)); \
+ err++; \
+ } \
+ } \
+ for (i = size; i < BUFSIZE; i++) { \
+ if (BUF[i] != i) { \
+ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \
+ err++; \
+ } \
+ } \
+}
+
+CHECK_STORE(b, bbuf, NBYTES, x)
+CHECK_STORE(h, hbuf, NHALFS, x)
+CHECK_STORE(w, wbuf, NWORDS, x)
+CHECK_STORE(d, dbuf, NDOBLS, llx)
+
+#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
+void circ_test_store_imm_##SZ(void) \
+{ \
+ unsigned int size = 27; \
+ TYPE *p = BUF; \
+ TYPE val = 0; \
+ int i; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
+ val++; \
+ } \
+ check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
+ p = BUF; \
+ val = 0; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
+ -(INC)); \
+ val++; \
+ } \
+ check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
+}
+
+CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1)
+CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2)
+CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2)
+CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4)
+CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8)
+CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1)
+CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2)
+CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4)
+
+#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
+void circ_test_store_reg_##SZ(void) \
+{ \
+ TYPE *p = BUF; \
+ unsigned int size = 19; \
+ TYPE val = 0; \
+ int i; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
+ val++; \
+ } \
+ check_store_##CHK(1, size); \
+ p = BUF; \
+ val = 0; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
+ val++; \
+ } \
+ check_store_##CHK(-1, size); \
+}
+
+CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16)
+CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0)
+CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0)
+CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0)
+
+/* Test the old scheme used in Hexagon V3 */
+static void circ_test_v3(void)
+{
+ int *p = wbuf;
+ int size = 15;
+ int K = 4; /* 64 bytes */
+ int element;
+ int i;
+
+ init_wbuf();
+
+ for (i = 0; i < NWORDS; i++) {
+ __asm__(
+ "r4 = %2\n\t"
+ "m1 = r4\n\t"
+ "%0 = memw(%1++I:circ(M1))\n\t"
+ : "=r"(element), "+r"(p)
+ : "r"(build_mreg(1, K, size * sizeof(int)))
+ : "r4", "m1");
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
+ check_load(i, element, 1, size);
+ }
+}
+
+int main()
+{
+ init_bbuf();
+ init_hbuf();
+ init_wbuf();
+ init_dbuf();
+
+ DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
+ DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
+ DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
+ DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
+ DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
+
+ circ_test_load_imm_b();
+ circ_test_load_imm_ub();
+ circ_test_load_imm_h();
+ circ_test_load_imm_uh();
+ circ_test_load_imm_w();
+ circ_test_load_imm_d();
+
+ circ_test_load_reg_b();
+ circ_test_load_reg_ub();
+ circ_test_load_reg_h();
+ circ_test_load_reg_uh();
+ circ_test_load_reg_w();
+ circ_test_load_reg_d();
+
+ circ_test_store_imm_b();
+ circ_test_store_imm_h();
+ circ_test_store_imm_f();
+ circ_test_store_imm_w();
+ circ_test_store_imm_d();
+ circ_test_store_imm_bnew();
+ circ_test_store_imm_hnew();
+ circ_test_store_imm_wnew();
+
+ circ_test_store_reg_b();
+ circ_test_store_reg_h();
+ circ_test_store_reg_f();
+ circ_test_store_reg_w();
+ circ_test_store_reg_d();
+ circ_test_store_reg_bnew();
+ circ_test_store_reg_hnew();
+ circ_test_store_reg_wnew();
+
+ circ_test_v3();
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c
index e4f1a0e..0dff429 100644
--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@@ -37,10 +37,12 @@ const int SF_NaN = 0x7fc00000;
const int SF_NaN_special = 0x7f800001;
const int SF_ANY = 0x3f800000;
const int SF_HEX_NAN = 0xffffffff;
+const int SF_small_neg = 0xab98fba8;
const long long DF_NaN = 0x7ff8000000000000ULL;
const long long DF_ANY = 0x3f80000000000000ULL;
const long long DF_HEX_NAN = 0xffffffffffffffffULL;
+const long long DF_small_neg = 0xbd731f7500000000ULL;
int err;
@@ -248,6 +250,87 @@ static void check_dfminmax(void)
check_fpstatus(usr, FPINVF);
}
+static void check_recip_exception(void)
+{
+ int result;
+ int usr;
+
+ /*
+ * Check that sfrecipa doesn't set status bits when
+ * a NaN with bit 22 non-zero is passed
+ */
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, 0);
+
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, 0);
+
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_NaN)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, 0);
+
+ /*
+ * Check that sfrecipa doesn't set status bits when
+ * a NaN with bit 22 zero is passed
+ */
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_NaN_special), "r"(SF_ANY)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, FPINVF);
+
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN_special)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, FPINVF);
+
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(SF_NaN_special)
+ : "r2", "p0", "usr");
+ check32(result, SF_HEX_NAN);
+ check_fpstatus(usr, FPINVF);
+
+ /*
+ * Check that sfrecipa properly sets divid-by-zero
+ */
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(0x885dc960), "r"(0x80000000)
+ : "r2", "p0", "usr");
+ check32(result, 0x3f800000);
+ check_fpstatus(usr, FPDBZF);
+
+ asm (CLEAR_FPSTATUS
+ "%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(result), "=r"(usr) : "r"(0x7f800000), "r"(SF_ZERO)
+ : "r2", "p0", "usr");
+ check32(result, 0x3f800000);
+ check_fpstatus(usr, 0);
+}
+
static void check_canonical_NaN(void)
{
int sf_result;
@@ -358,12 +441,171 @@ static void check_canonical_NaN(void)
check_fpstatus(usr, 0);
}
+static void check_invsqrta(void)
+{
+ int result;
+ int predval;
+
+ asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+ "%1 = p0\n\t"
+ : "+r"(result), "=r"(predval)
+ : "r"(0x7f800000)
+ : "p0");
+ check32(result, 0xff800000);
+ check32(predval, 0x0);
+}
+
+static void check_float2int_convs()
+{
+ int res32;
+ long long res64;
+ int usr;
+
+ /*
+ * Check that the various forms of float-to-unsigned
+ * check sign before rounding
+ */
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2uw(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(SF_small_neg)
+ : "r2", "usr");
+ check32(res32, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2uw(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(SF_small_neg)
+ : "r2", "usr");
+ check32(res32, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2ud(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(SF_small_neg)
+ : "r2", "usr");
+ check64(res64, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2ud(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(SF_small_neg)
+ : "r2", "usr");
+ check64(res64, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2uw(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(DF_small_neg)
+ : "r2", "usr");
+ check32(res32, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2uw(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(DF_small_neg)
+ : "r2", "usr");
+ check32(res32, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2ud(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(DF_small_neg)
+ : "r2", "usr");
+ check64(res64, 0);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2ud(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(DF_small_neg)
+ : "r2", "usr");
+ check64(res64, 0);
+ check_fpstatus(usr, FPINVF);
+
+ /*
+ * Check that the various forms of float-to-signed return -1 for NaN
+ */
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2w(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(SF_NaN)
+ : "r2", "usr");
+ check32(res32, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2w(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(SF_NaN)
+ : "r2", "usr");
+ check32(res32, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2d(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(SF_NaN)
+ : "r2", "usr");
+ check64(res64, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_sf2d(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(SF_NaN)
+ : "r2", "usr");
+ check64(res64, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2w(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+ : "r2", "usr");
+ check32(res32, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2w(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+ : "r2", "usr");
+ check32(res32, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2d(%2)\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+ : "r2", "usr");
+ check64(res64, -1);
+ check_fpstatus(usr, FPINVF);
+
+ asm(CLEAR_FPSTATUS
+ "%0 = convert_df2d(%2):chop\n\t"
+ "%1 = usr\n\t"
+ : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+ : "r2", "usr");
+ check64(res64, -1);
+ check_fpstatus(usr, FPINVF);
+}
+
int main()
{
check_compare_exception();
check_sfminmax();
check_dfminmax();
+ check_recip_exception();
check_canonical_NaN();
+ check_invsqrta();
+ check_float2int_convs();
puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
diff --git a/tests/tcg/hexagon/load_align.c b/tests/tcg/hexagon/load_align.c
new file mode 100644
index 0000000..12fc9cb
--- /dev/null
+++ b/tests/tcg/hexagon/load_align.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test load align instructions
+ *
+ * Example
+ * r1:0 = memh_fifo(r1+#0)
+ * loads a half word from memory, shifts the destination register
+ * right by one half word and inserts the loaded value into the high
+ * half word of the destination.
+ *
+ * There are 8 addressing modes and byte and half word variants, for a
+ * total of 16 instructions to test
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+char buf[16] __attribute__((aligned(1 << 16)));
+
+void init_buf(void)
+{
+ int i;
+ for (i = 0; i < 16; i++) {
+ buf[i] = i + 1;
+ }
+}
+
+void __check(int line, long long result, long long expect)
+{
+ if (result != expect) {
+ printf("ERROR at line %d: 0x%016llx != 0x%016llx\n",
+ line, result, expect);
+ err++;
+ }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+void __checkp(int line, void *p, void *expect)
+{
+ if (p != expect) {
+ printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect);
+ err++;
+ }
+}
+
+#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP)
+
+/*
+ ****************************************************************************
+ * _io addressing mode (addr + offset)
+ */
+#define LOAD_io(SZ, RES, ADDR, OFF) \
+ __asm__( \
+ "%0 = mem" #SZ "_fifo(%1+#" #OFF ")\n\t" \
+ : "+r"(RES) \
+ : "r"(ADDR))
+#define LOAD_io_b(RES, ADDR, OFF) \
+ LOAD_io(b, RES, ADDR, OFF)
+#define LOAD_io_h(RES, ADDR, OFF) \
+ LOAD_io(h, RES, ADDR, OFF)
+
+#define TEST_io(NAME, SZ, SIZE, EXP1, EXP2, EXP3, EXP4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ LOAD_io_##SZ(result, buf, 0 * (SIZE)); \
+ check(result, (EXP1)); \
+ LOAD_io_##SZ(result, buf, 1 * (SIZE)); \
+ check(result, (EXP2)); \
+ LOAD_io_##SZ(result, buf, 2 * (SIZE)); \
+ check(result, (EXP3)); \
+ LOAD_io_##SZ(result, buf, 3 * (SIZE)); \
+ check(result, (EXP4)); \
+}
+
+TEST_io(loadalignb_io, b, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_io(loadalignh_io, h, 2,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _ur addressing mode (index << offset + base)
+ */
+#define LOAD_ur(SZ, RES, SHIFT, IDX) \
+ __asm__( \
+ "%0 = mem" #SZ "_fifo(%1<<#" #SHIFT " + ##buf)\n\t" \
+ : "+r"(RES) \
+ : "r"(IDX))
+#define LOAD_ur_b(RES, SHIFT, IDX) \
+ LOAD_ur(b, RES, SHIFT, IDX)
+#define LOAD_ur_h(RES, SHIFT, IDX) \
+ LOAD_ur(h, RES, SHIFT, IDX)
+
+#define TEST_ur(NAME, SZ, SHIFT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ LOAD_ur_##SZ(result, (SHIFT), 0); \
+ check(result, (RES1)); \
+ LOAD_ur_##SZ(result, (SHIFT), 1); \
+ check(result, (RES2)); \
+ LOAD_ur_##SZ(result, (SHIFT), 2); \
+ check(result, (RES3)); \
+ LOAD_ur_##SZ(result, (SHIFT), 3); \
+ check(result, (RES4)); \
+}
+
+TEST_ur(loadalignb_ur, b, 1,
+ 0x01ffffffffffffffLL, 0x0301ffffffffffffLL,
+ 0x050301ffffffffffLL, 0x07050301ffffffffLL)
+TEST_ur(loadalignh_ur, h, 1,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _ap addressing mode (addr = base)
+ */
+#define LOAD_ap(SZ, RES, PTR, ADDR) \
+ __asm__( \
+ "%0 = mem" #SZ "_fifo(%1 = ##" #ADDR ")\n\t" \
+ : "+r"(RES), "=r"(PTR))
+#define LOAD_ap_b(RES, PTR, ADDR) \
+ LOAD_ap(b, RES, PTR, ADDR)
+#define LOAD_ap_h(RES, PTR, ADDR) \
+ LOAD_ap(h, RES, PTR, ADDR)
+
+#define TEST_ap(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr; \
+ LOAD_ap_##SZ(result, ptr, (buf + 0 * (SIZE))); \
+ check(result, (RES1)); \
+ checkp(ptr, &buf[0 * (SIZE)]); \
+ LOAD_ap_##SZ(result, ptr, (buf + 1 * (SIZE))); \
+ check(result, (RES2)); \
+ checkp(ptr, &buf[1 * (SIZE)]); \
+ LOAD_ap_##SZ(result, ptr, (buf + 2 * (SIZE))); \
+ check(result, (RES3)); \
+ checkp(ptr, &buf[2 * (SIZE)]); \
+ LOAD_ap_##SZ(result, ptr, (buf + 3 * (SIZE))); \
+ check(result, (RES4)); \
+ checkp(ptr, &buf[3 * (SIZE)]); \
+}
+
+TEST_ap(loadalignb_ap, b, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_ap(loadalignh_ap, h, 2,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _rp addressing mode (addr ++ modifer-reg)
+ */
+#define LOAD_pr(SZ, RES, PTR, INC) \
+ __asm__( \
+ "m0 = %2\n\t" \
+ "%0 = mem" #SZ "_fifo(%1++m0)\n\t" \
+ : "+r"(RES), "+r"(PTR) \
+ : "r"(INC) \
+ : "m0")
+#define LOAD_pr_b(RES, PTR, INC) \
+ LOAD_pr(b, RES, PTR, INC)
+#define LOAD_pr_h(RES, PTR, INC) \
+ LOAD_pr(h, RES, PTR, INC)
+
+#define TEST_pr(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr = buf; \
+ LOAD_pr_##SZ(result, ptr, (SIZE)); \
+ check(result, (RES1)); \
+ checkp(ptr, &buf[1 * (SIZE)]); \
+ LOAD_pr_##SZ(result, ptr, (SIZE)); \
+ check(result, (RES2)); \
+ checkp(ptr, &buf[2 * (SIZE)]); \
+ LOAD_pr_##SZ(result, ptr, (SIZE)); \
+ check(result, (RES3)); \
+ checkp(ptr, &buf[3 * (SIZE)]); \
+ LOAD_pr_##SZ(result, ptr, (SIZE)); \
+ check(result, (RES4)); \
+ checkp(ptr, &buf[4 * (SIZE)]); \
+}
+
+TEST_pr(loadalignb_pr, b, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_pr(loadalignh_pr, h, 2,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _pbr addressing mode (addr ++ modifer-reg:brev)
+ */
+#define LOAD_pbr(SZ, RES, PTR) \
+ __asm__( \
+ "r4 = #(1 << (16 - 3))\n\t" \
+ "m0 = r4\n\t" \
+ "%0 = mem" #SZ "_fifo(%1++m0:brev)\n\t" \
+ : "+r"(RES), "+r"(PTR) \
+ : \
+ : "r4", "m0")
+#define LOAD_pbr_b(RES, PTR) \
+ LOAD_pbr(b, RES, PTR)
+#define LOAD_pbr_h(RES, PTR) \
+ LOAD_pbr(h, RES, PTR)
+
+#define TEST_pbr(NAME, SZ, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr = buf; \
+ LOAD_pbr_##SZ(result, ptr); \
+ check(result, (RES1)); \
+ LOAD_pbr_##SZ(result, ptr); \
+ check(result, (RES2)); \
+ LOAD_pbr_##SZ(result, ptr); \
+ check(result, (RES3)); \
+ LOAD_pbr_##SZ(result, ptr); \
+ check(result, (RES4)); \
+}
+
+TEST_pbr(loadalignb_pbr, b,
+ 0x01ffffffffffffffLL, 0x0501ffffffffffffLL,
+ 0x030501ffffffffffLL, 0x07030501ffffffffLL)
+TEST_pbr(loadalignh_pbr, h,
+ 0x0201ffffffffffffLL, 0x06050201ffffffffLL,
+ 0x040306050201ffffLL, 0x0807040306050201LL)
+
+/*
+ ****************************************************************************
+ * _pi addressing mode (addr ++ inc)
+ */
+#define LOAD_pi(SZ, RES, PTR, INC) \
+ __asm__( \
+ "%0 = mem" #SZ "_fifo(%1++#" #INC ")\n\t" \
+ : "+r"(RES), "+r"(PTR))
+#define LOAD_pi_b(RES, PTR, INC) \
+ LOAD_pi(b, RES, PTR, INC)
+#define LOAD_pi_h(RES, PTR, INC) \
+ LOAD_pi(h, RES, PTR, INC)
+
+#define TEST_pi(NAME, SZ, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr = buf; \
+ LOAD_pi_##SZ(result, ptr, (INC)); \
+ check(result, (RES1)); \
+ checkp(ptr, &buf[1 * (INC)]); \
+ LOAD_pi_##SZ(result, ptr, (INC)); \
+ check(result, (RES2)); \
+ checkp(ptr, &buf[2 * (INC)]); \
+ LOAD_pi_##SZ(result, ptr, (INC)); \
+ check(result, (RES3)); \
+ checkp(ptr, &buf[3 * (INC)]); \
+ LOAD_pi_##SZ(result, ptr, (INC)); \
+ check(result, (RES4)); \
+ checkp(ptr, &buf[4 * (INC)]); \
+}
+
+TEST_pi(loadalignb_pi, b, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_pi(loadalignh_pi, h, 2,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _pci addressing mode (addr ++ inc:circ)
+ */
+#define LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %2\n\t" \
+ "%0 = mem" #SZ "_fifo(%1++#" #INC ":circ(m0))\n\t" \
+ : "+r"(RES), "+r"(PTR) \
+ : "r"(START), "r"(LEN) \
+ : "r4", "m0", "cs0")
+#define LOAD_pci_b(RES, PTR, START, LEN, INC) \
+ LOAD_pci(b, RES, PTR, START, LEN, INC)
+#define LOAD_pci_h(RES, PTR, START, LEN, INC) \
+ LOAD_pci(h, RES, PTR, START, LEN, INC)
+
+#define TEST_pci(NAME, SZ, LEN, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr = buf; \
+ LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES1)); \
+ checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
+ LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES2)); \
+ checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
+ LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES3)); \
+ checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
+ LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES4)); \
+ checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
+}
+
+TEST_pci(loadalignb_pci, b, 2, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x010201ffffffffffLL, 0x02010201ffffffffLL)
+TEST_pci(loadalignh_pci, h, 4, 2,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x020104030201ffffLL, 0x0403020104030201LL)
+
+/*
+ ****************************************************************************
+ * _pcr addressing mode (addr ++ I:circ(modifier-reg))
+ */
+#define LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %2\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %3\n\t" \
+ "%0 = mem" #SZ "_fifo(%1++I:circ(m1))\n\t" \
+ : "+r"(RES), "+r"(PTR) \
+ : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
+ "r"(START) \
+ : "r4", "m1", "cs1")
+#define LOAD_pcr_b(RES, PTR, START, LEN, INC) \
+ LOAD_pcr(b, RES, PTR, START, LEN, INC)
+#define LOAD_pcr_h(RES, PTR, START, LEN, INC) \
+ LOAD_pcr(h, RES, PTR, START, LEN, INC)
+
+#define TEST_pcr(NAME, SZ, SIZE, LEN, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ long long result = ~0LL; \
+ void *ptr = buf; \
+ LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES1)); \
+ checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
+ LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES2)); \
+ checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
+ LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES3)); \
+ checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
+ LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES4)); \
+ checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
+}
+
+TEST_pcr(loadalignb_pcr, b, 1, 2, 1,
+ 0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+ 0x010201ffffffffffLL, 0x02010201ffffffffLL)
+TEST_pcr(loadalignh_pcr, h, 2, 4, 1,
+ 0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+ 0x020104030201ffffLL, 0x0403020104030201LL)
+
+int main()
+{
+ init_buf();
+
+ test_loadalignb_io();
+ test_loadalignh_io();
+
+ test_loadalignb_ur();
+ test_loadalignh_ur();
+
+ test_loadalignb_ap();
+ test_loadalignh_ap();
+
+ test_loadalignb_pr();
+ test_loadalignh_pr();
+
+ test_loadalignb_pbr();
+ test_loadalignh_pbr();
+
+ test_loadalignb_pi();
+ test_loadalignh_pi();
+
+ test_loadalignb_pci();
+ test_loadalignh_pci();
+
+ test_loadalignb_pcr();
+ test_loadalignh_pcr();
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c
new file mode 100644
index 0000000..3575a37
--- /dev/null
+++ b/tests/tcg/hexagon/load_unpack.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test load unpack instructions
+ *
+ * Example
+ * r0 = memubh(r1+#0)
+ * loads a half word from memory and zero-extends the 2 bytes to form a word
+ *
+ * For each addressing mode, there are 4 tests
+ * bzw2 unsigned 2 elements
+ * bsw2 signed 2 elements
+ * bzw4 unsigned 4 elements
+ * bsw4 signed 4 elements
+ * There are 8 addressing modes, for a total of 32 instructions to test
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+char buf[16] __attribute__((aligned(1 << 16)));
+
+void init_buf(void)
+{
+ int i;
+ for (i = 0; i < 16; i++) {
+ int sign = i % 2 == 0 ? 0x80 : 0;
+ buf[i] = sign | (i + 1);
+ }
+}
+
+void __check(int line, long long result, long long expect)
+{
+ if (result != expect) {
+ printf("ERROR at line %d: 0x%08llx != 0x%08llx\n",
+ line, result, expect);
+ err++;
+ }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+void __checkp(int line, void *p, void *expect)
+{
+ if (p != expect) {
+ printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect);
+ err++;
+ }
+}
+
+#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP)
+
+/*
+ ****************************************************************************
+ * _io addressing mode (addr + offset)
+ */
+#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \
+ __asm__( \
+ "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \
+ : "=r"(RES) \
+ : "r"(ADDR))
+#define BxW_LOAD_io_Z(RES, ADDR, OFF) \
+ BxW_LOAD_io(ubh, RES, ADDR, OFF)
+#define BxW_LOAD_io_S(RES, ADDR, OFF) \
+ BxW_LOAD_io(bh, RES, ADDR, OFF)
+
+#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ init_buf(); \
+ BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \
+ check(result, (EXP1) | (EXT)); \
+ BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \
+ check(result, (EXP2) | (EXT)); \
+ BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \
+ check(result, (EXP3) | (EXT)); \
+ BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \
+ check(result, (EXP4) | (EXT)); \
+}
+
+
+TEST_io(loadbzw2_io, int, Z, 2, 0x00000000,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbzw4_io, long long, Z, 4, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_io(loadbsw4_io, long long, S, 4, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ur addressing mode (index << offset + base)
+ */
+#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \
+ __asm__( \
+ "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \
+ : "=r"(RES) \
+ : "r"(IDX))
+#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \
+ BxW_LOAD_ur(ubh, RES, SHIFT, IDX)
+#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \
+ BxW_LOAD_ur(bh, RES, SHIFT, IDX)
+
+#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ init_buf(); \
+ BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \
+ check(result, (RES1) | (EXT)); \
+ BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \
+ check(result, (RES2) | (EXT)); \
+ BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \
+ check(result, (RES3) | (EXT)); \
+ BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \
+ check(result, (RES4) | (EXT)); \
+} \
+
+TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ap addressing mode (addr = base)
+ */
+#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \
+ __asm__( \
+ "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \
+ : "=r"(RES), "=r"(PTR))
+#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \
+ BxW_LOAD_ap(ubh, RES, PTR, ADDR)
+#define BxW_LOAD_ap_S(RES, PTR, ADDR) \
+ BxW_LOAD_ap(bh, RES, PTR, ADDR)
+
+#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr; \
+ init_buf(); \
+ BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \
+ check(result, (RES1) | (EXT)); \
+ checkp(ptr, &buf[0 * (SIZE)]); \
+ BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \
+ check(result, (RES2) | (EXT)); \
+ checkp(ptr, &buf[1 * (SIZE)]); \
+ BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \
+ check(result, (RES3) | (EXT)); \
+ checkp(ptr, &buf[2 * (SIZE)]); \
+ BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \
+ check(result, (RES4) | (EXT)); \
+ checkp(ptr, &buf[3 * (SIZE)]); \
+}
+
+TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _rp addressing mode (addr ++ modifer-reg)
+ */
+#define BxW_LOAD_pr(SZ, RES, PTR, INC) \
+ __asm__( \
+ "m0 = %2\n\t" \
+ "%0 = mem" #SZ "(%1++m0)\n\t" \
+ : "=r"(RES), "+r"(PTR) \
+ : "r"(INC) \
+ : "m0")
+#define BxW_LOAD_pr_Z(RES, PTR, INC) \
+ BxW_LOAD_pr(ubh, RES, PTR, INC)
+#define BxW_LOAD_pr_S(RES, PTR, INC) \
+ BxW_LOAD_pr(bh, RES, PTR, INC)
+
+#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr = buf; \
+ init_buf(); \
+ BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+ check(result, (RES1) | (EXT)); \
+ checkp(ptr, &buf[1 * (SIZE)]); \
+ BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+ check(result, (RES2) | (EXT)); \
+ checkp(ptr, &buf[2 * (SIZE)]); \
+ BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+ check(result, (RES3) | (EXT)); \
+ checkp(ptr, &buf[3 * (SIZE)]); \
+ BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+ check(result, (RES4) | (EXT)); \
+ checkp(ptr, &buf[4 * (SIZE)]); \
+}
+
+TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000,
+ 0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00,
+ 0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pbr addressing mode (addr ++ modifer-reg:brev)
+ */
+#define BxW_LOAD_pbr(SZ, RES, PTR) \
+ __asm__( \
+ "r4 = #(1 << (16 - 3))\n\t" \
+ "m0 = r4\n\t" \
+ "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
+ : "=r"(RES), "+r"(PTR) \
+ : \
+ : "r4", "m0")
+#define BxW_LOAD_pbr_Z(RES, PTR) \
+ BxW_LOAD_pbr(ubh, RES, PTR)
+#define BxW_LOAD_pbr_S(RES, PTR) \
+ BxW_LOAD_pbr(bh, RES, PTR)
+
+#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr = buf; \
+ init_buf(); \
+ BxW_LOAD_pbr_##SIGN(result, ptr); \
+ check(result, (RES1) | (EXT)); \
+ BxW_LOAD_pbr_##SIGN(result, ptr); \
+ check(result, (RES2) | (EXT)); \
+ BxW_LOAD_pbr_##SIGN(result, ptr); \
+ check(result, (RES3) | (EXT)); \
+ BxW_LOAD_pbr_##SIGN(result, ptr); \
+ check(result, (RES4) | (EXT)); \
+}
+
+TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000,
+ 0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00,
+ 0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0006008500040083LL, 0x000a008900080087LL)
+TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0006008500040083LL, 0x000a008900080087LL)
+
+/*
+ ****************************************************************************
+ * _pi addressing mode (addr ++ inc)
+ */
+#define BxW_LOAD_pi(SZ, RES, PTR, INC) \
+ __asm__( \
+ "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \
+ : "=r"(RES), "+r"(PTR))
+#define BxW_LOAD_pi_Z(RES, PTR, INC) \
+ BxW_LOAD_pi(ubh, RES, PTR, INC)
+#define BxW_LOAD_pi_S(RES, PTR, INC) \
+ BxW_LOAD_pi(bh, RES, PTR, INC)
+
+#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr = buf; \
+ init_buf(); \
+ BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+ check(result, (RES1) | (EXT)); \
+ checkp(ptr, &buf[1 * (INC)]); \
+ BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+ check(result, (RES2) | (EXT)); \
+ checkp(ptr, &buf[2 * (INC)]); \
+ BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+ check(result, (RES3) | (EXT)); \
+ checkp(ptr, &buf[3 * (INC)]); \
+ BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+ check(result, (RES4) | (EXT)); \
+ checkp(ptr, &buf[4 * (INC)]); \
+}
+
+TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00,
+ 0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pci addressing mode (addr ++ inc:circ)
+ */
+#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %2\n\t" \
+ "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \
+ : "=r"(RES), "+r"(PTR) \
+ : "r"(START), "r"(LEN) \
+ : "r4", "m0", "cs0")
+#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \
+ BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \
+ BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr = buf; \
+ init_buf(); \
+ BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES1) | (EXT)); \
+ checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
+ BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES2) | (EXT)); \
+ checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
+ BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES3) | (EXT)); \
+ checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
+ BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES4) | (EXT)); \
+ checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
+}
+
+TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000,
+ 0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00,
+ 0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0004008300020081LL, 0x0008008700060085LL)
+
+/*
+ ****************************************************************************
+ * _pcr addressing mode (addr ++ I:circ(modifier-reg))
+ */
+#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %2\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %3\n\t" \
+ "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \
+ : "=r"(RES), "+r"(PTR) \
+ : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
+ "r"(START) \
+ : "r4", "m1", "cs1")
+#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \
+ BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \
+ BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \
+ EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+ TYPE result; \
+ void *ptr = buf; \
+ init_buf(); \
+ BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES1) | (EXT)); \
+ checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
+ BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES2) | (EXT)); \
+ checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
+ BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES3) | (EXT)); \
+ checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
+ BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+ check(result, (RES4) | (EXT)); \
+ checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
+}
+
+TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000,
+ 0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00,
+ 0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL,
+ 0x0004008300020081LL, 0x0008008700060085LL,
+ 0x0004008300020081LL, 0x0008008700060085LL)
+
+int main()
+{
+ test_loadbzw2_io();
+ test_loadbsw2_io();
+ test_loadbzw4_io();
+ test_loadbsw4_io();
+
+ test_loadbzw2_ur();
+ test_loadbsw2_ur();
+ test_loadbzw4_ur();
+ test_loadbsw4_ur();
+
+ test_loadbzw2_ap();
+ test_loadbsw2_ap();
+ test_loadbzw4_ap();
+ test_loadbsw4_ap();
+
+ test_loadbzw2_pr();
+ test_loadbsw2_pr();
+ test_loadbzw4_pr();
+ test_loadbsw4_pr();
+
+ test_loadbzw2_pbr();
+ test_loadbsw2_pbr();
+ test_loadbzw4_pbr();
+ test_loadbsw4_pbr();
+
+ test_loadbzw2_pi();
+ test_loadbsw2_pi();
+ test_loadbzw4_pi();
+ test_loadbsw4_pi();
+
+ test_loadbzw2_pci();
+ test_loadbsw2_pci();
+ test_loadbzw4_pci();
+ test_loadbsw4_pci();
+
+ test_loadbzw2_pcr();
+ test_loadbsw2_pcr();
+ test_loadbzw4_pcr();
+ test_loadbsw4_pcr();
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c
index 458759f..17c3919 100644
--- a/tests/tcg/hexagon/misc.c
+++ b/tests/tcg/hexagon/misc.c
@@ -231,6 +231,14 @@ static void check(int val, int expect)
}
}
+static void check64(long long val, long long expect)
+{
+ if (val != expect) {
+ printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect);
+ err++;
+ }
+}
+
uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
uint32_t array[10];
@@ -264,8 +272,36 @@ static long long creg_pair(int x, int y)
return retval;
}
+static long long decbin(long long x, long long y, int *pred)
+{
+ long long retval;
+ asm ("%0 = decbin(%2, %3)\n\t"
+ "%1 = p0\n\t"
+ : "=r"(retval), "=r"(*pred)
+ : "r"(x), "r"(y));
+ return retval;
+}
+
+/* Check that predicates are auto-and'ed in a packet */
+static int auto_and(void)
+{
+ int retval;
+ asm ("r5 = #1\n\t"
+ "{\n\t"
+ " p0 = cmp.eq(r1, #1)\n\t"
+ " p0 = cmp.eq(r1, #2)\n\t"
+ "}\n\t"
+ "%0 = p0\n\t"
+ : "=r"(retval)
+ :
+ : "r5", "p0");
+ return retval;
+}
+
int main()
{
+ long long res64;
+ int pred;
memcpy(array, init, sizeof(array));
S4_storerhnew_rr(array, 4, 0xffff);
@@ -375,6 +411,17 @@ int main()
res = test_clrtnew(2, 7);
check(res, 7);
+ res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred);
+ check64(res64, 0x357980003700010cLL);
+ check(pred, 0);
+
+ res64 = decbin(0xfLL, 0x1bLL, &pred);
+ check64(res64, 0x78000100LL);
+ check(pred, 1);
+
+ res = auto_and();
+ check(res, 0);
+
puts(err ? "FAIL" : "PASS");
return err;
}
diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c
new file mode 100644
index 0000000..52997b3
--- /dev/null
+++ b/tests/tcg/hexagon/multi_result.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+static int sfrecipa(int Rs, int Rt, int *pred_result)
+{
+ int result;
+ int predval;
+
+ asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t"
+ "%1 = p0\n\t"
+ : "+r"(result), "=r"(predval)
+ : "r"(Rs), "r"(Rt)
+ : "p0");
+ *pred_result = predval;
+ return result;
+}
+
+static int sfinvsqrta(int Rs, int *pred_result)
+{
+ int result;
+ int predval;
+
+ asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+ "%1 = p0\n\t"
+ : "+r"(result), "=r"(predval)
+ : "r"(Rs)
+ : "p0");
+ *pred_result = predval;
+ return result;
+}
+
+static long long vacsh(long long Rxx, long long Rss, long long Rtt,
+ int *pred_result, int *ovf_result)
+{
+ long long result = Rxx;
+ int predval;
+ int usr;
+
+ /*
+ * This instruction can set bit 0 (OVF/overflow) in usr
+ * Clear the bit first, then return that bit to the caller
+ */
+ asm volatile("r2 = usr\n\t"
+ "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */
+ "usr = r2\n\t"
+ "%0,p0 = vacsh(%3, %4)\n\t"
+ "%1 = p0\n\t"
+ "%2 = usr\n\t"
+ : "+r"(result), "=r"(predval), "=r"(usr)
+ : "r"(Rss), "r"(Rtt)
+ : "r2", "p0", "usr");
+ *pred_result = predval;
+ *ovf_result = (usr & 1);
+ return result;
+}
+
+static long long vminub(long long Rtt, long long Rss,
+ int *pred_result)
+{
+ long long result;
+ int predval;
+
+ asm volatile("%0,p0 = vminub(%2, %3)\n\t"
+ "%1 = p0\n\t"
+ : "=r"(result), "=r"(predval)
+ : "r"(Rtt), "r"(Rss)
+ : "p0");
+ *pred_result = predval;
+ return result;
+}
+
+static long long add_carry(long long Rss, long long Rtt,
+ int pred_in, int *pred_result)
+{
+ long long result;
+ int predval = pred_in;
+
+ asm volatile("p0 = %1\n\t"
+ "%0 = add(%2, %3, p0):carry\n\t"
+ "%1 = p0\n\t"
+ : "=r"(result), "+r"(predval)
+ : "r"(Rss), "r"(Rtt)
+ : "p0");
+ *pred_result = predval;
+ return result;
+}
+
+static long long sub_carry(long long Rss, long long Rtt,
+ int pred_in, int *pred_result)
+{
+ long long result;
+ int predval = pred_in;
+
+ asm volatile("p0 = !cmp.eq(%1, #0)\n\t"
+ "%0 = sub(%2, %3, p0):carry\n\t"
+ "%1 = p0\n\t"
+ : "=r"(result), "+r"(predval)
+ : "r"(Rss), "r"(Rtt)
+ : "p0");
+ *pred_result = predval;
+ return result;
+}
+
+int err;
+
+static void check_ll(long long val, long long expect)
+{
+ if (val != expect) {
+ printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect);
+ err++;
+ }
+}
+
+static void check(int val, int expect)
+{
+ if (val != expect) {
+ printf("ERROR: 0x%08x != 0x%08x\n", val, expect);
+ err++;
+ }
+}
+
+static void check_p(int val, int expect)
+{
+ if (val != expect) {
+ printf("ERROR: 0x%02x != 0x%02x\n", val, expect);
+ err++;
+ }
+}
+
+static void test_sfrecipa()
+{
+ int res;
+ int pred_result;
+
+ res = sfrecipa(0x04030201, 0x05060708, &pred_result);
+ check(res, 0x59f38001);
+ check_p(pred_result, 0x00);
+}
+
+static void test_sfinvsqrta()
+{
+ int res;
+ int pred_result;
+
+ res = sfinvsqrta(0x04030201, &pred_result);
+ check(res, 0x4d330000);
+ check_p(pred_result, 0xe0);
+
+ res = sfinvsqrta(0x0, &pred_result);
+ check(res, 0x3f800000);
+ check_p(pred_result, 0x0);
+}
+
+static void test_vacsh()
+{
+ long long res64;
+ int pred_result;
+ int ovf_result;
+
+ res64 = vacsh(0x0004000300020001LL,
+ 0x0001000200030004LL,
+ 0x0000000000000000LL, &pred_result, &ovf_result);
+ check_ll(res64, 0x0004000300030004LL);
+ check_p(pred_result, 0xf0);
+ check(ovf_result, 0);
+
+ res64 = vacsh(0x0004000300020001LL,
+ 0x0001000200030004LL,
+ 0x000affff000d0000LL, &pred_result, &ovf_result);
+ check_ll(res64, 0x000e0003000f0004LL);
+ check_p(pred_result, 0xcc);
+ check(ovf_result, 0);
+
+ res64 = vacsh(0x00047fff00020001LL,
+ 0x00017fff00030004LL,
+ 0x000a0fff000d0000LL, &pred_result, &ovf_result);
+ check_ll(res64, 0x000e7fff000f0004LL);
+ check_p(pred_result, 0xfc);
+ check(ovf_result, 1);
+
+ res64 = vacsh(0x0004000300020001LL,
+ 0x0001000200030009LL,
+ 0x000affff000d0001LL, &pred_result, &ovf_result);
+ check_ll(res64, 0x000e0003000f0008LL);
+ check_p(pred_result, 0xcc);
+ check(ovf_result, 0);
+}
+
+static void test_vminub()
+{
+ long long res64;
+ int pred_result;
+
+ res64 = vminub(0x0807060504030201LL,
+ 0x0102030405060708LL,
+ &pred_result);
+ check_ll(res64, 0x0102030404030201LL);
+ check_p(pred_result, 0xf0);
+
+ res64 = vminub(0x0802060405030701LL,
+ 0x0107030504060208LL,
+ &pred_result);
+ check_ll(res64, 0x0102030404030201LL);
+ check_p(pred_result, 0xaa);
+}
+
+static void test_add_carry()
+{
+ long long res64;
+ int pred_result;
+
+ res64 = add_carry(0x0000000000000000LL,
+ 0xffffffffffffffffLL,
+ 1, &pred_result);
+ check_ll(res64, 0x0000000000000000LL);
+ check_p(pred_result, 0xff);
+
+ res64 = add_carry(0x0000000100000000LL,
+ 0xffffffffffffffffLL,
+ 0, &pred_result);
+ check_ll(res64, 0x00000000ffffffffLL);
+ check_p(pred_result, 0xff);
+
+ res64 = add_carry(0x0000000100000000LL,
+ 0xffffffffffffffffLL,
+ 0, &pred_result);
+ check_ll(res64, 0x00000000ffffffffLL);
+ check_p(pred_result, 0xff);
+}
+
+static void test_sub_carry()
+{
+ long long res64;
+ int pred_result;
+
+ res64 = sub_carry(0x0000000000000000LL,
+ 0x0000000000000000LL,
+ 1, &pred_result);
+ check_ll(res64, 0x0000000000000000LL);
+ check_p(pred_result, 0xff);
+
+ res64 = sub_carry(0x0000000100000000LL,
+ 0x0000000000000000LL,
+ 0, &pred_result);
+ check_ll(res64, 0x00000000ffffffffLL);
+ check_p(pred_result, 0xff);
+
+ res64 = sub_carry(0x0000000100000000LL,
+ 0x0000000000000000LL,
+ 0, &pred_result);
+ check_ll(res64, 0x00000000ffffffffLL);
+ check_p(pred_result, 0xff);
+}
+
+int main()
+{
+ test_sfrecipa();
+ test_sfinvsqrta();
+ test_vacsh();
+ test_vminub();
+ test_add_carry();
+ test_sub_carry();
+
+ puts(err ? "FAIL" : "PASS");
+ return err;
+}