From a27c100c23d2b23a8d5342b8d8d75e238f6342ba Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Sun, 14 Mar 2021 23:53:04 -0500 Subject: target/hexagon: translation changes Change cpu_ldl_code to translator_ldl. Don't end the TB after every packet when HEX_DEBUG is on. Make gen_check_store_width a simple call. Reported-by: Richard Henderson < Signed-off-by: Taylor Simpson Message-Id: <1615783984-25918-1-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/translate.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index eeaad5f..2317508 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -88,8 +88,8 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t)); for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { - words[nwords] = cpu_ldl_code(env, - ctx->base.pc_next + nwords * sizeof(uint32_t)); + words[nwords] = + translator_ldl(env, ctx->base.pc_next + nwords * sizeof(uint32_t)); found_end = is_packet_end(words[nwords]); } if (!found_end) { @@ -292,20 +292,16 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt) tcg_temp_free(pval); } -#if HEX_DEBUG -static inline void gen_check_store_width(DisasContext *ctx, int slot_num) +static void gen_check_store_width(DisasContext *ctx, int slot_num) { +#if HEX_DEBUG TCGv slot = tcg_const_tl(slot_num); TCGv check = tcg_const_tl(ctx->store_width[slot_num]); gen_helper_debug_check_store_width(cpu_env, slot, check); tcg_temp_free(slot); tcg_temp_free(check); -} -#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) \ - gen_check_store_width(ctx, slot_num) -#else -#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) /* nothing */ #endif +} static bool slot_is_predicated(Packet *pkt, int slot_num) { @@ -355,25 +351,25 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num) */ switch (ctx->store_width[slot_num]) { case 1: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st8(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 2: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st16(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 4: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st32(hex_store_val32[slot_num], hex_store_addr[slot_num], ctx->mem_idx); break; case 8: - HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num); + gen_check_store_width(ctx, slot_num); tcg_gen_qemu_st64(hex_store_val64[slot_num], hex_store_addr[slot_num], ctx->mem_idx); @@ -593,10 +589,6 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { ctx->base.is_jmp = DISAS_TOO_MANY; } -#if HEX_DEBUG - /* When debugging, only put one packet per TB */ - ctx->base.is_jmp = DISAS_TOO_MANY; -#endif } } -- cgit v1.1 From 4c82c2b433fab9814000e7794d9168044863ecfc Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Sun, 14 Mar 2021 23:53:57 -0500 Subject: target/hexagon: remove unnecessary checks in find_iclass_slots Reported-by: Richard Henderson < Signed-off-by: Taylor Simpson Message-Id: <1615784037-26129-1-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/iclass.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c index 378d8a6..6091286 100644 --- a/target/hexagon/iclass.c +++ b/target/hexagon/iclass.c @@ -53,10 +53,6 @@ SlotMask find_iclass_slots(Opcode opcode, int itype) (opcode == Y2_isync) || (opcode == J2_pause) || (opcode == J4_hintjumpr)) { return SLOTS_2; - } else if ((itype == ICLASS_V2LDST) && (GET_ATTRIB(opcode, A_STORE))) { - return SLOTS_01; - } else if ((itype == ICLASS_V2LDST) && (!GET_ATTRIB(opcode, A_STORE))) { - return SLOTS_01; } else if (GET_ATTRIB(opcode, A_CRSLOT23)) { return SLOTS_23; } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) { -- cgit v1.1 From 1de468b3987e6a85bc0a046d444bf76659242dd1 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Sun, 14 Mar 2021 23:54:09 -0500 Subject: target/hexagon: Change DECODE_MAPPED_REG operand name to OPNUM Reported-by: Richard Henderson < Signed-off-by: Taylor Simpson Message-Id: <1615784049-26215-1-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/decode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index c9bacaa..1c9c074 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -48,8 +48,8 @@ enum { DEF_REGMAP(R_16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23) DEF_REGMAP(R__8, 8, 0, 2, 4, 6, 16, 18, 20, 22) -#define DECODE_MAPPED_REG(REGNO, NAME) \ - insn->regno[REGNO] = DECODE_REGISTER_##NAME[insn->regno[REGNO]]; +#define DECODE_MAPPED_REG(OPNUM, NAME) \ + insn->regno[OPNUM] = DECODE_REGISTER_##NAME[insn->regno[OPNUM]]; typedef struct { const struct DectreeTable *table_link; -- cgit v1.1 From d9099caf04bda396e084429d3dd3f6601e23b7ca Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Sun, 14 Mar 2021 23:55:15 -0500 Subject: target/hexagon: fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Taylor Simpson Message-Id: <1615784115-26559-1-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/op_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 2c6d718..d6b5c47 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -298,7 +298,7 @@ int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) } /* - * Hexagon FP operations return ~0 insteat of NaN + * Hexagon FP operations return ~0 instead of NaN * The hex_check_sfnan/hex_check_dfnan functions perform this check */ static float32 hex_check_sfnan(float32 x) -- cgit v1.1 From 5f261764cec291bb159ffecd291a1bfd6800215b Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Sun, 14 Mar 2021 23:55:00 -0500 Subject: target/hexagon: remove unnecessary semicolons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reported-by: Richard Henderson < Signed-off-by: Taylor Simpson Message-Id: <1615784100-26459-1-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index e044dea..a30048e 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -83,9 +83,9 @@ #define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE -#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE; +#define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadruh_pr(SHORTCODE) SHORTCODE -#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE; +#define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadri_pr(SHORTCODE) SHORTCODE -- cgit v1.1 From d799f8ad08742e9c042e208d970febf87a2c4901 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:29 -0500 Subject: Hexagon (target/hexagon) TCG generation cleanup Simplify TCG generation of hex_reg_written Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-2-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/genptr.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 7481f4c..87f5d92 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -35,7 +35,6 @@ static inline TCGv gen_read_preg(TCGv pred, uint8_t num) static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) { - TCGv one = tcg_const_tl(1); TCGv zero = tcg_const_tl(0); TCGv slot_mask = tcg_temp_new(); @@ -43,12 +42,17 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, val, hex_new_value[rnum]); #if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], slot_mask, zero, - one, hex_reg_written[rnum]); + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); #endif - tcg_temp_free(one); tcg_temp_free(zero); tcg_temp_free(slot_mask); } -- cgit v1.1 From edf26ade436f69c170e7eec77d4b5d1d2db78d3f Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:30 -0500 Subject: Hexagon (target/hexagon) cleanup gen_log_predicated_reg_write_pair Similar to previous cleanup of gen_log_predicated_reg_write Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-3-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/genptr.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 87f5d92..07d970f 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -69,36 +69,35 @@ static inline void gen_log_reg_write(int rnum, TCGv val) static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot) { TCGv val32 = tcg_temp_new(); - TCGv one = tcg_const_tl(1); TCGv zero = tcg_const_tl(0); TCGv slot_mask = tcg_temp_new(); tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, - val32, hex_new_value[rnum]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], + tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, - one, hex_reg_written[rnum]); -#endif - + val32, hex_new_value[rnum]); /* High word */ tcg_gen_extrh_i64_i32(val32, val); tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1], slot_mask, zero, val32, hex_new_value[rnum + 1]); #if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum + 1], - slot_mask, zero, - one, hex_reg_written[rnum + 1]); + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); + tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1], + slot_mask); #endif tcg_temp_free(val32); - tcg_temp_free(one); tcg_temp_free(zero); tcg_temp_free(slot_mask); } -- cgit v1.1 From 2d27cebbf8994621e0a4bda9609d24982f5ba8c6 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:31 -0500 Subject: Hexagon (target/hexagon) remove unnecessary inline directives Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-4-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/cpu.c | 9 ++++----- target/hexagon/decode.c | 6 +++--- target/hexagon/fma_emu.c | 39 ++++++++++++++++++++------------------- target/hexagon/op_helper.c | 37 ++++++++++++++++++------------------- target/hexagon/translate.c | 2 +- 5 files changed, 46 insertions(+), 47 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index b0b3040..c2fe357 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -69,10 +69,9 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { * stacks at different locations. This is used to compensate so the diff is * cleaner. */ -static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env, - target_ulong addr) +static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr) { - HexagonCPU *cpu = container_of(env, HexagonCPU, env); + HexagonCPU *cpu = hexagon_env_get_cpu(env); target_ulong stack_adjust = cpu->lldb_stack_adjust; target_ulong stack_start = env->stack_start; target_ulong stack_size = 0x10000; @@ -88,7 +87,7 @@ static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env, } /* HEX_REG_P3_0 (aka C4) is an alias for the predicate registers */ -static inline target_ulong read_p3_0(CPUHexagonState *env) +static target_ulong read_p3_0(CPUHexagonState *env) { int32_t control_reg = 0; int i; @@ -116,7 +115,7 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum) static void hexagon_dump(CPUHexagonState *env, FILE *f) { - HexagonCPU *cpu = container_of(env, HexagonCPU, env); + HexagonCPU *cpu = hexagon_env_get_cpu(env); if (cpu->lldb_compat) { /* diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 1c9c074..65d97ce 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -354,7 +354,7 @@ static void decode_split_cmpjump(Packet *pkt) } } -static inline int decode_opcode_can_jump(int opcode) +static int decode_opcode_can_jump(int opcode) { if ((GET_ATTRIB(opcode, A_JUMP)) || (GET_ATTRIB(opcode, A_CALL)) || @@ -370,7 +370,7 @@ static inline int decode_opcode_can_jump(int opcode) return 0; } -static inline int decode_opcode_ends_loop(int opcode) +static int decode_opcode_ends_loop(int opcode) { return GET_ATTRIB(opcode, A_HWLOOP0_END) || GET_ATTRIB(opcode, A_HWLOOP1_END); @@ -764,7 +764,7 @@ static void decode_add_endloop_insn(Insn *insn, int loopnum) } } -static inline int decode_parsebits_is_loopend(uint32_t encoding32) +static int decode_parsebits_is_loopend(uint32_t encoding32) { uint32_t bits = parse_bits(encoding32); return bits == 0x2; diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c index 842d903..f324b83 100644 --- a/target/hexagon/fma_emu.c +++ b/target/hexagon/fma_emu.c @@ -64,7 +64,7 @@ typedef union { }; } Float; -static inline uint64_t float64_getmant(float64 f64) +static uint64_t float64_getmant(float64 f64) { Double a = { .i = f64 }; if (float64_is_normal(f64)) { @@ -91,7 +91,7 @@ int32_t float64_getexp(float64 f64) return -1; } -static inline uint64_t float32_getmant(float32 f32) +static uint64_t float32_getmant(float32 f32) { Float a = { .i = f32 }; if (float32_is_normal(f32)) { @@ -118,17 +118,17 @@ int32_t float32_getexp(float32 f32) return -1; } -static inline uint32_t int128_getw0(Int128 x) +static uint32_t int128_getw0(Int128 x) { return int128_getlo(x); } -static inline uint32_t int128_getw1(Int128 x) +static uint32_t int128_getw1(Int128 x) { return int128_getlo(x) >> 32; } -static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi) +static Int128 int128_mul_6464(uint64_t ai, uint64_t bi) { Int128 a, b; uint64_t pp0, pp1a, pp1b, pp1s, pp2; @@ -152,7 +152,7 @@ static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi) return int128_make128(ret_low, pp2 + (pp1s >> 32)); } -static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow) +static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow) { Int128 ret = int128_sub(a, b); if (borrow != 0) { @@ -170,7 +170,7 @@ typedef struct { uint8_t sticky; } Accum; -static inline void accum_init(Accum *p) +static void accum_init(Accum *p) { p->mant = int128_zero(); p->exp = 0; @@ -180,7 +180,7 @@ static inline void accum_init(Accum *p) p->sticky = 0; } -static inline Accum accum_norm_left(Accum a) +static Accum accum_norm_left(Accum a) { a.exp--; a.mant = int128_lshift(a.mant, 1); @@ -190,6 +190,7 @@ static inline Accum accum_norm_left(Accum a) return a; } +/* This function is marked inline for performance reasons */ static inline Accum accum_norm_right(Accum a, int amt) { if (amt > 130) { @@ -226,7 +227,7 @@ static inline Accum accum_norm_right(Accum a, int amt) */ static Accum accum_add(Accum a, Accum b); -static inline Accum accum_sub(Accum a, Accum b, int negate) +static Accum accum_sub(Accum a, Accum b, int negate) { Accum ret; accum_init(&ret); @@ -329,7 +330,7 @@ static Accum accum_add(Accum a, Accum b) } /* Return an infinity with requested sign */ -static inline float64 infinite_float64(uint8_t sign) +static float64 infinite_float64(uint8_t sign) { if (sign) { return make_float64(DF_MINUS_INF); @@ -339,7 +340,7 @@ static inline float64 infinite_float64(uint8_t sign) } /* Return a maximum finite value with requested sign */ -static inline float64 maxfinite_float64(uint8_t sign) +static float64 maxfinite_float64(uint8_t sign) { if (sign) { return make_float64(DF_MINUS_MAXF); @@ -349,7 +350,7 @@ static inline float64 maxfinite_float64(uint8_t sign) } /* Return a zero value with requested sign */ -static inline float64 zero_float64(uint8_t sign) +static float64 zero_float64(uint8_t sign) { if (sign) { return make_float64(0x8000000000000000); @@ -369,7 +370,7 @@ float32 infinite_float32(uint8_t sign) } /* Return a maximum finite value with the requested sign */ -static inline float32 maxfinite_float32(uint8_t sign) +static float32 maxfinite_float32(uint8_t sign) { if (sign) { return make_float32(SF_MINUS_MAXF); @@ -379,7 +380,7 @@ static inline float32 maxfinite_float32(uint8_t sign) } /* Return a zero value with requested sign */ -static inline float32 zero_float32(uint8_t sign) +static float32 zero_float32(uint8_t sign) { if (sign) { return make_float32(0x80000000); @@ -389,7 +390,7 @@ static inline float32 zero_float32(uint8_t sign) } #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \ -static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \ +static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \ { \ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \ && ((a.guard | a.round | a.sticky) == 0)) { \ @@ -526,8 +527,8 @@ static bool is_inf_prod(float64 a, float64 b) (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a)))); } -static inline float64 special_fma(float64 a, float64 b, float64 c, - float_status *fp_status) +static float64 special_fma(float64 a, float64 b, float64 c, + float_status *fp_status) { float64 ret = make_float64(0); @@ -586,8 +587,8 @@ static inline float64 special_fma(float64 a, float64 b, float64 c, g_assert_not_reached(); } -static inline float32 special_fmaf(float32 a, float32 b, float32 c, - float_status *fp_status) +static float32 special_fmaf(float32 a, float32 b, float32 c, + float_status *fp_status) { float64 aa, bb, cc; aa = float32_to_float64(a, fp_status); diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index d6b5c47..5d35dfc 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -46,8 +46,8 @@ void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -static inline void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val, uint32_t slot) +static void log_reg_write(CPUHexagonState *env, int rnum, + target_ulong val, uint32_t slot) { HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")", rnum, val, val); @@ -63,8 +63,7 @@ static inline void log_reg_write(CPUHexagonState *env, int rnum, #endif } -static inline void log_pred_write(CPUHexagonState *env, int pnum, - target_ulong val) +static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) { HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", @@ -79,8 +78,8 @@ static inline void log_pred_write(CPUHexagonState *env, int pnum, } } -static inline void log_store32(CPUHexagonState *env, target_ulong addr, - target_ulong val, int width, int slot) +static void log_store32(CPUHexagonState *env, target_ulong addr, + target_ulong val, int width, int slot) { HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", %" PRId32 " [0x08%" PRIx32 "])\n", @@ -90,8 +89,8 @@ static inline void log_store32(CPUHexagonState *env, target_ulong addr, env->mem_log_stores[slot].data32 = val; } -static inline void log_store64(CPUHexagonState *env, target_ulong addr, - int64_t val, int width, int slot) +static void log_store64(CPUHexagonState *env, target_ulong addr, + int64_t val, int width, int slot) { HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx ", %" PRId64 " [0x016%" PRIx64 "])\n", @@ -101,7 +100,7 @@ static inline void log_store64(CPUHexagonState *env, target_ulong addr, env->mem_log_stores[slot].data64 = val; } -static inline void write_new_pc(CPUHexagonState *env, target_ulong addr) +static void write_new_pc(CPUHexagonState *env, target_ulong addr) { HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr); @@ -132,7 +131,7 @@ void HELPER(debug_start_packet)(CPUHexagonState *env) } #endif -static inline int32_t new_pred_value(CPUHexagonState *env, int pnum) +static int32_t new_pred_value(CPUHexagonState *env, int pnum) { return env->new_pred_value[pnum]; } @@ -332,8 +331,8 @@ static void check_noshuf(CPUHexagonState *env, uint32_t slot) } } -static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint8_t retval; check_noshuf(env, slot); @@ -341,8 +340,8 @@ static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint16_t retval; check_noshuf(env, slot); @@ -350,8 +349,8 @@ static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint32_t retval; check_noshuf(env, slot); @@ -359,8 +358,8 @@ static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, return retval; } -static inline uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr) +static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, + target_ulong vaddr) { uint64_t retval; check_noshuf(env, slot); @@ -939,7 +938,7 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV, return RxV; } -static inline bool is_inf_prod(int32_t a, int32_t b) +static bool is_inf_prod(int32_t a, int32_t b) { return (float32_is_infinity(a) && float32_is_infinity(b)) || (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) || diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 2317508..f975d7a 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -585,7 +585,7 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) * The CPU log is used to compare against LLDB single stepping, * so end the TLB after every packet. */ - HexagonCPU *hex_cpu = container_of(env, HexagonCPU, env); + HexagonCPU *hex_cpu = hexagon_env_get_cpu(env); if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { ctx->base.is_jmp = DISAS_TOO_MANY; } -- cgit v1.1 From 7d9ab2021f98b93a5af10f594daad6472b530e4d Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:32 -0500 Subject: Hexagon (target/hexagon) use env_archcpu and env_cpu Remove hexagon_env_get_cpu and replace with env_archcpu Replace CPU(hexagon_env_get_cpu(env)) with env_cpu(env) Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-5-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- linux-user/hexagon/cpu_loop.c | 2 +- target/hexagon/cpu.c | 4 ++-- target/hexagon/cpu.h | 5 ----- target/hexagon/op_helper.c | 2 +- target/hexagon/translate.c | 2 +- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c index 9a68ca0..bc34f5d 100644 --- a/linux-user/hexagon/cpu_loop.c +++ b/linux-user/hexagon/cpu_loop.c @@ -25,7 +25,7 @@ void cpu_loop(CPUHexagonState *env) { - CPUState *cs = CPU(hexagon_env_get_cpu(env)); + CPUState *cs = env_cpu(env); int trapnr, signum, sigcode; target_ulong sigaddr; target_ulong syscallnum; diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index c2fe357..f044506 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -71,7 +71,7 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { */ static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr) { - HexagonCPU *cpu = hexagon_env_get_cpu(env); + HexagonCPU *cpu = env_archcpu(env); target_ulong stack_adjust = cpu->lldb_stack_adjust; target_ulong stack_start = env->stack_start; target_ulong stack_size = 0x10000; @@ -115,7 +115,7 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum) static void hexagon_dump(CPUHexagonState *env, FILE *f) { - HexagonCPU *cpu = hexagon_env_get_cpu(env); + HexagonCPU *cpu = env_archcpu(env); if (cpu->lldb_compat) { /* diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index e04eac5..2855dd3 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -127,11 +127,6 @@ typedef struct HexagonCPU { target_ulong lldb_stack_adjust; } HexagonCPU; -static inline HexagonCPU *hexagon_env_get_cpu(CPUHexagonState *env) -{ - return container_of(env, HexagonCPU, env); -} - #include "cpu_bits.h" #define cpu_signal_handler cpu_hexagon_signal_handler diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 5d35dfc..7ac8554 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -35,7 +35,7 @@ static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc) { - CPUState *cs = CPU(hexagon_env_get_cpu(env)); + CPUState *cs = env_cpu(env); qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); cs->exception_index = exception; cpu_loop_exit_restore(cs, pc); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index f975d7a..e235fdb 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -585,7 +585,7 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu) * The CPU log is used to compare against LLDB single stepping, * so end the TLB after every packet. */ - HexagonCPU *hex_cpu = hexagon_env_get_cpu(env); + HexagonCPU *hex_cpu = env_archcpu(env); if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) { ctx->base.is_jmp = DISAS_TOO_MANY; } -- cgit v1.1 From 743debbc373ffcd8eb66dc388632c03f5e951bfc Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:33 -0500 Subject: Hexagon (target/hexagon) properly generate TB end for DISAS_NORETURN When exiting a TB, generate all the code before returning from hexagon_tr_translate_packet so that nothing needs to be done in hexagon_tr_tb_stop. Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-6-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/translate.c | 62 ++++++++++++++++++++++++---------------------- target/hexagon/translate.h | 3 --- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index e235fdb..9f2a531 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -54,16 +54,40 @@ static const char * const hexagon_prednames[] = { "p0", "p1", "p2", "p3" }; -void gen_exception(int excp) +static void gen_exception_raw(int excp) { TCGv_i32 helper_tmp = tcg_const_i32(excp); gen_helper_raise_exception(cpu_env, helper_tmp); tcg_temp_free_i32(helper_tmp); } -void gen_exception_debug(void) +static void gen_exec_counters(DisasContext *ctx) +{ + tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], + hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); + tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], + hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); +} + +static void gen_end_tb(DisasContext *ctx) { - gen_exception(EXCP_DEBUG); + gen_exec_counters(ctx); + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + if (ctx->base.singlestep_enabled) { + gen_exception_raw(EXCP_DEBUG); + } else { + tcg_gen_exit_tb(NULL, 0); + } + ctx->base.is_jmp = DISAS_NORETURN; +} + +static void gen_exception_end_tb(DisasContext *ctx, int excp) +{ + gen_exec_counters(ctx); + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + gen_exception_raw(excp); + ctx->base.is_jmp = DISAS_NORETURN; + } #if HEX_DEBUG @@ -225,8 +249,7 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx, mark_implicit_writes(ctx, insn); insn->generate(env, ctx, insn, pkt); } else { - gen_exception(HEX_EXCP_INVALID_OPCODE); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); } } @@ -447,14 +470,6 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt) ctx->num_insns += num_real_insns; } -static void gen_exec_counters(DisasContext *ctx) -{ - tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], - hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets); - tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT], - hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); -} - static void gen_commit_packet(DisasContext *ctx, Packet *pkt) { gen_reg_writes(ctx); @@ -478,7 +493,7 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) #endif if (pkt->pkt_has_cof) { - ctx->base.is_jmp = DISAS_NORETURN; + gen_end_tb(ctx); } } @@ -491,8 +506,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) nwords = read_packet_words(env, ctx, words); if (!nwords) { - gen_exception(HEX_EXCP_INVALID_PACKET); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); return; } @@ -505,8 +519,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) gen_commit_packet(ctx, &pkt); ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; } else { - gen_exception(HEX_EXCP_INVALID_PACKET); - ctx->base.is_jmp = DISAS_NORETURN; + gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET); } } @@ -536,9 +549,7 @@ static bool hexagon_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, { DisasContext *ctx = container_of(dcbase, DisasContext, base); - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); - ctx->base.is_jmp = DISAS_NORETURN; - gen_exception_debug(); + gen_exception_end_tb(ctx, EXCP_DEBUG); /* * The address covered by the breakpoint must be included in * [tb->pc, tb->pc + tb->size) in order to for it to be @@ -601,19 +612,12 @@ static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_exec_counters(ctx); tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next); if (ctx->base.singlestep_enabled) { - gen_exception_debug(); + gen_exception_raw(EXCP_DEBUG); } else { tcg_gen_exit_tb(NULL, 0); } break; case DISAS_NORETURN: - gen_exec_counters(ctx); - tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); - if (ctx->base.singlestep_enabled) { - gen_exception_debug(); - } else { - tcg_gen_exit_tb(NULL, 0); - } break; default: g_assert_not_reached(); diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 938f7fb..12506c8 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -86,8 +86,5 @@ extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; -void gen_exception(int excp); -void gen_exception_debug(void); - void process_store(DisasContext *ctx, Packet *pkt, int slot_num); #endif -- cgit v1.1 From 6c677c60ae34bd2c7936781ee8969e41b1dac81e Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:34 -0500 Subject: Hexagon (target/hexagon) decide if pred has been written at TCG gen time Multiple writes to the same preg are and'ed together. Rather than generating a runtime check, we can determine at TCG generation time if the predicate has previously been written in the packet. Test added to tests/tcg/hexagon/misc.c Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-7-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg_funcs.py | 2 +- target/hexagon/genptr.c | 22 +++++++++++++++------- target/hexagon/translate.c | 9 +++++++-- target/hexagon/translate.h | 2 ++ tests/tcg/hexagon/misc.c | 19 +++++++++++++++++++ 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index db9f663..7ceb25b 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -316,7 +316,7 @@ def genptr_dst_write(f, tag, regtype, regid): print("Bad register parse: ", regtype, regid) elif (regtype == "P"): if (regid in {"d", "e", "x"}): - f.write(" gen_log_pred_write(%s%sN, %s%sV);\n" % \ + f.write(" gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \ (regtype, regid, regtype, regid)) f.write(" ctx_log_pred_write(ctx, %s%sN);\n" % \ (regtype, regid)) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 07d970f..6b74344 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -119,20 +119,28 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) #endif } -static inline void gen_log_pred_write(int pnum, TCGv val) +static inline void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { TCGv zero = tcg_const_tl(0); TCGv base_val = tcg_temp_new(); TCGv and_val = tcg_temp_new(); TCGv pred_written = tcg_temp_new(); - /* Multiple writes to the same preg are and'ed together */ tcg_gen_andi_tl(base_val, val, 0xff); - tcg_gen_and_tl(and_val, base_val, hex_new_pred_value[pnum]); - tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pnum); - tcg_gen_movcond_tl(TCG_COND_NE, hex_new_pred_value[pnum], - pred_written, zero, - and_val, base_val); + + /* + * Section 6.1.3 of the Hexagon V67 Programmer's Reference Manual + * + * Multiple writes to the same preg are and'ed together + * If this is the first predicate write in the packet, do a + * straight assignment. Otherwise, do an and. + */ + if (!test_bit(pnum, ctx->pregs_written)) { + tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val); + } else { + tcg_gen_and_tl(hex_new_pred_value[pnum], + hex_new_pred_value[pnum], base_val); + } tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); tcg_temp_free(zero); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 9f2a531..49ec8b7 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -172,6 +172,7 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; + bitmap_zero(ctx->pregs_written, NUM_PREGS); for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } @@ -226,7 +227,7 @@ static void mark_implicit_pred_write(DisasContext *ctx, Insn *insn, } } -static void mark_implicit_writes(DisasContext *ctx, Insn *insn) +static void mark_implicit_reg_writes(DisasContext *ctx, Insn *insn) { mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_FP, HEX_REG_FP); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SP, HEX_REG_SP); @@ -235,7 +236,10 @@ static void mark_implicit_writes(DisasContext *ctx, Insn *insn) mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1); mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); +} +static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn) +{ mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P0, 0); mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P1, 1); mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P2, 2); @@ -246,8 +250,9 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx, Insn *insn, Packet *pkt) { if (insn->generate) { - mark_implicit_writes(ctx, insn); + mark_implicit_reg_writes(ctx, insn); insn->generate(env, ctx, insn, pkt); + mark_implicit_pred_writes(ctx, insn); } else { gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE); } diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 12506c8..0ecfbd7 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -34,6 +34,7 @@ typedef struct DisasContext { DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; + DECLARE_BITMAP(pregs_written, NUM_PREGS); uint8_t store_width[STORES_MAX]; uint8_t s1_store_processed; } DisasContext; @@ -60,6 +61,7 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { ctx->preg_log[ctx->preg_log_idx] = pnum; ctx->preg_log_idx++; + set_bit(pnum, ctx->pregs_written); } static inline bool is_preloaded(DisasContext *ctx, int num) diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index 458759f..e5d78b4 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -264,6 +264,22 @@ static long long creg_pair(int x, int y) return retval; } +/* Check that predicates are auto-and'ed in a packet */ +static int auto_and(void) +{ + int retval; + asm ("r5 = #1\n\t" + "{\n\t" + " p0 = cmp.eq(r1, #1)\n\t" + " p0 = cmp.eq(r1, #2)\n\t" + "}\n\t" + "%0 = p0\n\t" + : "=r"(retval) + : + : "r5", "p0"); + return retval; +} + int main() { @@ -375,6 +391,9 @@ int main() res = test_clrtnew(2, 7); check(res, 7); + res = auto_and(); + check(res, 0); + puts(err ? "FAIL" : "PASS"); return err; } -- cgit v1.1 From 92cfa25fd2cf65749a93507e132225066bc19ed5 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:35 -0500 Subject: Hexagon (target/hexagon) change variables from int to bool when appropriate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-8-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/cpu_bits.h | 2 +- target/hexagon/decode.c | 80 +++++++++++++++++++++++----------------------- target/hexagon/insn.h | 21 ++++++------ target/hexagon/op_helper.c | 8 ++--- target/hexagon/translate.c | 6 ++-- target/hexagon/translate.h | 2 +- 6 files changed, 60 insertions(+), 59 deletions(-) diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index 96af834..96fef71 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -47,7 +47,7 @@ static inline uint32_t iclass_bits(uint32_t encoding) return iclass; } -static inline int is_packet_end(uint32_t endocing) +static inline bool is_packet_end(uint32_t endocing) { uint32_t bits = parse_bits(endocing); return ((bits == 0x3) || (bits == 0x0)); diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 65d97ce..dffe1d1 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -340,8 +340,8 @@ static void decode_split_cmpjump(Packet *pkt) if (GET_ATTRIB(pkt->insn[i].opcode, A_NEWCMPJUMP)) { last = pkt->num_insns; pkt->insn[last] = pkt->insn[i]; /* copy the instruction */ - pkt->insn[last].part1 = 1; /* last instruction does the CMP */ - pkt->insn[i].part1 = 0; /* existing instruction does the JUMP */ + pkt->insn[last].part1 = true; /* last insn does the CMP */ + pkt->insn[i].part1 = false; /* existing insn does the JUMP */ pkt->num_insns++; } } @@ -354,7 +354,7 @@ static void decode_split_cmpjump(Packet *pkt) } } -static int decode_opcode_can_jump(int opcode) +static bool decode_opcode_can_jump(int opcode) { if ((GET_ATTRIB(opcode, A_JUMP)) || (GET_ATTRIB(opcode, A_CALL)) || @@ -362,15 +362,15 @@ static int decode_opcode_can_jump(int opcode) (opcode == J2_pause)) { /* Exception to A_JUMP attribute */ if (opcode == J4_hintjumpr) { - return 0; + return false; } - return 1; + return true; } - return 0; + return false; } -static int decode_opcode_ends_loop(int opcode) +static bool decode_opcode_ends_loop(int opcode) { return GET_ATTRIB(opcode, A_HWLOOP0_END) || GET_ATTRIB(opcode, A_HWLOOP1_END); @@ -383,9 +383,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) int numinsns = pkt->num_insns; uint16_t opcode; - pkt->pkt_has_cof = 0; - pkt->pkt_has_endloop = 0; - pkt->pkt_has_dczeroa = 0; + pkt->pkt_has_cof = false; + pkt->pkt_has_endloop = false; + pkt->pkt_has_dczeroa = false; for (i = 0; i < numinsns; i++) { opcode = pkt->insn[i].opcode; @@ -394,14 +394,14 @@ static void decode_set_insn_attr_fields(Packet *pkt) } if (GET_ATTRIB(opcode, A_DCZEROA)) { - pkt->pkt_has_dczeroa = 1; + pkt->pkt_has_dczeroa = true; } if (GET_ATTRIB(opcode, A_STORE)) { if (pkt->insn[i].slot == 0) { - pkt->pkt_has_store_s0 = 1; + pkt->pkt_has_store_s0 = true; } else { - pkt->pkt_has_store_s1 = 1; + pkt->pkt_has_store_s1 = true; } } @@ -422,9 +422,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) */ static void decode_shuffle_for_execution(Packet *packet) { - int changed = 0; + bool changed = false; int i; - int flag; /* flag means we've seen a non-memory instruction */ + bool flag; /* flag means we've seen a non-memory instruction */ int n_mems; int last_insn = packet->num_insns - 1; @@ -437,7 +437,7 @@ static void decode_shuffle_for_execution(Packet *packet) } do { - changed = 0; + changed = false; /* * Stores go last, must not reorder. * Cannot shuffle stores past loads, either. @@ -445,13 +445,13 @@ static void decode_shuffle_for_execution(Packet *packet) * then a store, shuffle the store to the front. Don't shuffle * stores wrt each other or a load. */ - for (flag = n_mems = 0, i = last_insn; i >= 0; i--) { + for (flag = false, n_mems = 0, i = last_insn; i >= 0; i--) { int opcode = packet->insn[i].opcode; if (flag && GET_ATTRIB(opcode, A_STORE)) { decode_send_insn_to(packet, i, last_insn - n_mems); n_mems++; - changed = 1; + changed = true; } else if (GET_ATTRIB(opcode, A_STORE)) { n_mems++; } else if (GET_ATTRIB(opcode, A_LOAD)) { @@ -466,7 +466,7 @@ static void decode_shuffle_for_execution(Packet *packet) * a .new value */ } else { - flag = 1; + flag = true; } } @@ -474,7 +474,7 @@ static void decode_shuffle_for_execution(Packet *packet) continue; } /* Compares go first, may be reordered wrt each other */ - for (flag = 0, i = 0; i < last_insn + 1; i++) { + for (flag = false, i = 0; i < last_insn + 1; i++) { int opcode = packet->insn[i].opcode; if ((strstr(opcode_wregs[opcode], "Pd4") || @@ -483,7 +483,7 @@ static void decode_shuffle_for_execution(Packet *packet) /* This should be a compare (not a store conditional) */ if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P3) && @@ -495,18 +495,18 @@ static void decode_shuffle_for_execution(Packet *packet) */ if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P0) && !GET_ATTRIB(opcode, A_NEWCMPJUMP)) { if (flag) { decode_send_insn_to(packet, i, 0); - changed = 1; + changed = true; continue; } } else { - flag = 1; + flag = true; } } if (changed) { @@ -543,7 +543,7 @@ static void decode_apply_extenders(Packet *packet) int i; for (i = 0; i < packet->num_insns; i++) { if (GET_ATTRIB(packet->insn[i].opcode, A_IT_EXTENDER)) { - packet->insn[i + 1].extension_valid = 1; + packet->insn[i + 1].extension_valid = true; apply_extender(packet, i + 1, packet->insn[i].immed[0]); } } @@ -764,7 +764,7 @@ static void decode_add_endloop_insn(Insn *insn, int loopnum) } } -static int decode_parsebits_is_loopend(uint32_t encoding32) +static bool decode_parsebits_is_loopend(uint32_t encoding32) { uint32_t bits = parse_bits(encoding32); return bits == 0x2; @@ -775,8 +775,11 @@ decode_set_slot_number(Packet *pkt) { int slot; int i; - int hit_mem_insn = 0; - int hit_duplex = 0; + bool hit_mem_insn = false; + bool hit_duplex = false; + bool slot0_found = false; + bool slot1_found = false; + int slot1_iidx = 0; /* * The slots are encoded in reverse order @@ -801,7 +804,7 @@ decode_set_slot_number(Packet *pkt) if ((GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE) || GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE_PACKET_RULES)) && !hit_mem_insn) { - hit_mem_insn = 1; + hit_mem_insn = true; pkt->insn[i].slot = 0; continue; } @@ -818,7 +821,7 @@ decode_set_slot_number(Packet *pkt) for (i = pkt->num_insns - 1; i >= 0; i--) { /* First subinsn always goes to slot 0 */ if (GET_ATTRIB(pkt->insn[i].opcode, A_SUBINSN) && !hit_duplex) { - hit_duplex = 1; + hit_duplex = true; pkt->insn[i].slot = 0; continue; } @@ -830,13 +833,10 @@ decode_set_slot_number(Packet *pkt) } /* Fix the exceptions - slot 1 is never empty, always aligns to slot 0 */ - int slot0_found = 0; - int slot1_found = 0; - int slot1_iidx = 0; for (i = pkt->num_insns - 1; i >= 0; i--) { /* Is slot0 used? */ if (pkt->insn[i].slot == 0) { - int is_endloop = (pkt->insn[i].opcode == J2_endloop01); + bool is_endloop = (pkt->insn[i].opcode == J2_endloop01); is_endloop |= (pkt->insn[i].opcode == J2_endloop0); is_endloop |= (pkt->insn[i].opcode == J2_endloop1); @@ -845,17 +845,17 @@ decode_set_slot_number(Packet *pkt) * slot0 for endloop */ if (!is_endloop) { - slot0_found = 1; + slot0_found = true; } } /* Is slot1 used? */ if (pkt->insn[i].slot == 1) { - slot1_found = 1; + slot1_found = true; slot1_iidx = i; } } /* Is slot0 empty and slot1 used? */ - if ((slot0_found == 0) && (slot1_found == 1)) { + if ((!slot0_found) && slot1_found) { /* Then push it to slot0 */ pkt->insn[slot1_iidx].slot = 0; } @@ -873,7 +873,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, { int num_insns = 0; int words_read = 0; - int end_of_packet = 0; + bool end_of_packet = false; int new_insns = 0; uint32_t encoding32; @@ -890,7 +890,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, * decode works */ if (pkt->insn[num_insns].opcode == A4_ext) { - pkt->insn[num_insns + 1].extension_valid = 1; + pkt->insn[num_insns + 1].extension_valid = true; } num_insns += new_insns; words_read++; @@ -913,7 +913,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, decode_add_endloop_insn(&pkt->insn[pkt->num_insns++], 0); } if (words_read >= 3) { - uint32_t has_loop0, has_loop1; + bool has_loop0, has_loop1; has_loop0 = decode_parsebits_is_loopend(words[0]); has_loop1 = decode_parsebits_is_loopend(words[1]); if (has_loop0 && has_loop1) { diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h index 5756a1d..2e34591 100644 --- a/target/hexagon/insn.h +++ b/target/hexagon/insn.h @@ -40,14 +40,15 @@ struct Instruction { uint32_t iclass:6; uint32_t slot:3; - uint32_t part1:1; /* + uint32_t which_extended:1; /* If has an extender, which immediate */ + uint32_t new_value_producer_slot:4; + + bool part1; /* * cmp-jumps are split into two insns. * set for the compare and clear for the jump */ - uint32_t extension_valid:1; /* Has a constant extender attached */ - uint32_t which_extended:1; /* If has an extender, which immediate */ - uint32_t is_endloop:1; /* This is an end of loop */ - uint32_t new_value_producer_slot:4; + bool extension_valid; /* Has a constant extender attached */ + bool is_endloop; /* This is an end of loop */ int32_t immed[IMMEDS_MAX]; /* immediate field */ }; @@ -58,13 +59,13 @@ struct Packet { uint16_t encod_pkt_size_in_bytes; /* Pre-decodes about COF */ - uint32_t pkt_has_cof:1; /* Has any change-of-flow */ - uint32_t pkt_has_endloop:1; + bool pkt_has_cof; /* Has any change-of-flow */ + bool pkt_has_endloop; - uint32_t pkt_has_dczeroa:1; + bool pkt_has_dczeroa; - uint32_t pkt_has_store_s0:1; - uint32_t pkt_has_store_s1:1; + bool pkt_has_store_s0; + bool pkt_has_store_s1; Insn insn[INSTRUCTIONS_MAX]; }; diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 7ac8554..1d91fa2 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -948,8 +948,8 @@ static bool is_inf_prod(int32_t a, int32_t b) float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, float32 RsV, float32 RtV) { - int infinp; - int infminusinf; + bool infinp; + bool infminusinf; float32 tmp; arch_fpop_start(env); @@ -982,8 +982,8 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV, float32 RsV, float32 RtV) { - int infinp; - int infminusinf; + bool infinp; + bool infminusinf; float32 tmp; arch_fpop_start(env); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 49ec8b7..0468422 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -177,7 +177,7 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) ctx->store_width[i] = 0; } tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); - ctx->s1_store_processed = 0; + ctx->s1_store_processed = false; #if HEX_DEBUG /* Handy place to set a breakpoint before the packet executes */ @@ -210,7 +210,7 @@ static void mark_implicit_reg_write(DisasContext *ctx, Insn *insn, int attrib, int rnum) { if (GET_ATTRIB(insn->opcode, attrib)) { - int is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC); + bool is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC); if (is_predicated && !is_preloaded(ctx, rnum)) { tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]); } @@ -354,7 +354,7 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num) if (slot_num == 1 && ctx->s1_store_processed) { return; } - ctx->s1_store_processed = 1; + ctx->s1_store_processed = true; if (is_predicated) { TCGv cancelled = tcg_temp_new(); diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 0ecfbd7..97b12a7 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -36,7 +36,7 @@ typedef struct DisasContext { int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); uint8_t store_width[STORES_MAX]; - uint8_t s1_store_processed; + bool s1_store_processed; } DisasContext; static inline void ctx_log_reg_write(DisasContext *ctx, int rnum) -- cgit v1.1 From 85511161f7cd1f52a177413e2128b1a05b71163e Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:36 -0500 Subject: Hexagon (target/hexagon) remove unused carry_from_add64 function Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-9-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 13 ------------- target/hexagon/arch.h | 1 - target/hexagon/macros.h | 2 -- 3 files changed, 16 deletions(-) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 09de124..699e2cf 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -76,19 +76,6 @@ uint64_t deinterleave(uint64_t src) return myeven | (myodd << 32); } -uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c) -{ - uint64_t tmpa, tmpb, tmpc; - tmpa = fGETUWORD(0, a); - tmpb = fGETUWORD(0, b); - tmpc = tmpa + tmpb + c; - tmpa = fGETUWORD(1, a); - tmpb = fGETUWORD(1, b); - tmpc = tmpa + tmpb + fGETUWORD(1, tmpc); - tmpc = fGETUWORD(1, tmpc); - return tmpc; -} - int32_t conv_round(int32_t a, int n) { int64_t val; diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h index 1f7f036..6e0b0d9 100644 --- a/target/hexagon/arch.h +++ b/target/hexagon/arch.h @@ -22,7 +22,6 @@ uint64_t interleave(uint32_t odd, uint32_t even); uint64_t deinterleave(uint64_t src); -uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c); int32_t conv_round(int32_t a, int n); void arch_fpop_start(CPUHexagonState *env); void arch_fpop_end(CPUHexagonState *env); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index cfcb817..8cb211d 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -341,8 +341,6 @@ static inline void gen_logical_not(TCGv dest, TCGv src) #define fWRITE_LC0(VAL) WRITE_RREG(HEX_REG_LC0, VAL) #define fWRITE_LC1(VAL) WRITE_RREG(HEX_REG_LC1, VAL) -#define fCARRY_FROM_ADD(A, B, C) carry_from_add64(A, B, C) - #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG)) -- cgit v1.1 From 8c36752435da380ddf2733d499c4be2cdb8c1b6f Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:37 -0500 Subject: Hexagon (target/hexagon) change type of softfloat_roundingmodes Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-10-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 699e2cf..bb51f19 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -95,7 +95,7 @@ int32_t conv_round(int32_t a, int n) /* Floating Point Stuff */ -static const int softfloat_roundingmodes[] = { +static const FloatRoundMode softfloat_roundingmodes[] = { float_round_nearest_even, float_round_to_zero, float_round_down, -- cgit v1.1 From c0336c87b773614eebd23714e3a866bfcd78e9f2 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:38 -0500 Subject: Hexagon (target/hexagon) use softfloat default NaN and tininess Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-11-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- fpu/softfloat-specialize.c.inc | 3 +++ target/hexagon/cpu.c | 5 +++++ target/hexagon/op_helper.c | 47 ------------------------------------------ 3 files changed, 8 insertions(+), 47 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index c2f87ad..9ea318f 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -145,6 +145,9 @@ static FloatParts parts_default_nan(float_status *status) #elif defined(TARGET_HPPA) /* snan_bit_is_one, set msb-1. */ frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2); +#elif defined(TARGET_HEXAGON) + sign = 1; + frac = ~0ULL; #else /* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V, * S390, SH4, TriCore, and Xtensa. I cannot find documentation diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index f044506..ff44fd6 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -23,6 +23,7 @@ #include "exec/exec-all.h" #include "qapi/error.h" #include "hw/qdev-properties.h" +#include "fpu/softfloat-helpers.h" static void hexagon_v67_cpu_init(Object *obj) { @@ -205,8 +206,12 @@ static void hexagon_cpu_reset(DeviceState *dev) CPUState *cs = CPU(dev); HexagonCPU *cpu = HEXAGON_CPU(cs); HexagonCPUClass *mcc = HEXAGON_CPU_GET_CLASS(cpu); + CPUHexagonState *env = &cpu->env; mcc->parent_reset(dev); + + set_default_nan_mode(1, &env->fp_status); + set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); } static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 1d91fa2..478421d 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -297,26 +297,6 @@ int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) } /* - * Hexagon FP operations return ~0 instead of NaN - * The hex_check_sfnan/hex_check_dfnan functions perform this check - */ -static float32 hex_check_sfnan(float32 x) -{ - if (float32_is_any_nan(x)) { - return make_float32(0xFFFFFFFFU); - } - return x; -} - -static float64 hex_check_dfnan(float64 x) -{ - if (float64_is_any_nan(x)) { - return make_float64(0xFFFFFFFFFFFFFFFFULL); - } - return x; -} - -/* * mem_noshuf * Section 5.5 of the Hexagon V67 Programmer's Reference Manual * @@ -373,7 +353,6 @@ float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV) float64 out_f64; arch_fpop_start(env); out_f64 = float32_to_float64(RsV, &env->fp_status); - out_f64 = hex_check_dfnan(out_f64); arch_fpop_end(env); return out_f64; } @@ -383,7 +362,6 @@ float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV) float32 out_f32; arch_fpop_start(env); out_f32 = float64_to_float32(RssV, &env->fp_status); - out_f32 = hex_check_sfnan(out_f32); arch_fpop_end(env); return out_f32; } @@ -393,7 +371,6 @@ float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV) float32 RdV; arch_fpop_start(env); RdV = uint32_to_float32(RsV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -403,7 +380,6 @@ float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV) float64 RddV; arch_fpop_start(env); RddV = uint32_to_float64(RsV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -413,7 +389,6 @@ float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV) float32 RdV; arch_fpop_start(env); RdV = int32_to_float32(RsV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -423,7 +398,6 @@ float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV) float64 RddV; arch_fpop_start(env); RddV = int32_to_float64(RsV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -433,7 +407,6 @@ float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV) float32 RdV; arch_fpop_start(env); RdV = uint64_to_float32(RssV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -443,7 +416,6 @@ float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV) float64 RddV; arch_fpop_start(env); RddV = uint64_to_float64(RssV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -453,7 +425,6 @@ float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV) float32 RdV; arch_fpop_start(env); RdV = int64_to_float32(RssV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -463,7 +434,6 @@ float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV) float64 RddV; arch_fpop_start(env); RddV = int64_to_float64(RssV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -625,7 +595,6 @@ float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_add(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -635,7 +604,6 @@ float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_sub(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -687,7 +655,6 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_maxnum(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -697,7 +664,6 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = float32_minnum(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -764,7 +730,6 @@ float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV) float64 RddV; arch_fpop_start(env); RddV = float64_add(RssV, RttV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -774,7 +739,6 @@ float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV) float64 RddV; arch_fpop_start(env); RddV = float64_sub(RssV, RttV, &env->fp_status); - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -787,7 +751,6 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV) if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { float_raise(float_flag_invalid, &env->fp_status); } - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -800,7 +763,6 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV) if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { float_raise(float_flag_invalid, &env->fp_status); } - RddV = hex_check_dfnan(RddV); arch_fpop_end(env); return RddV; } @@ -876,7 +838,6 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV) float32 RdV; arch_fpop_start(env); RdV = internal_mpyf(RsV, RtV, &env->fp_status); - RdV = hex_check_sfnan(RdV); arch_fpop_end(env); return RdV; } @@ -886,7 +847,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV, { arch_fpop_start(env); RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status); - RxV = hex_check_sfnan(RxV); arch_fpop_end(env); return RxV; } @@ -918,7 +878,6 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RsV, &env->fp_status); RxV = check_nan(RxV, RtV, &env->fp_status); tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -933,7 +892,6 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV, arch_fpop_start(env); neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status); RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status); - RxV = hex_check_sfnan(RxV); arch_fpop_end(env); return RxV; } @@ -964,7 +922,6 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RsV, &env->fp_status); RxV = check_nan(RxV, RtV, &env->fp_status); tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -999,7 +956,6 @@ float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV, RxV = check_nan(RxV, RtV, &env->fp_status); float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status); tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status); - tmp = hex_check_sfnan(tmp); if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) { RxV = tmp; } @@ -1023,13 +979,11 @@ float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV) float64_is_normal(RttV)) { RddV = float64_mul(RssV, make_float64(0x4330000000000000), &env->fp_status); - RddV = hex_check_dfnan(RddV); } else if (float64_is_denormal(RttV) && (float64_getexp(RssV) >= 512) && float64_is_normal(RssV)) { RddV = float64_mul(RssV, make_float64(0x3cb0000000000000), &env->fp_status); - RddV = hex_check_dfnan(RddV); } else { RddV = RssV; } @@ -1042,7 +996,6 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV, { arch_fpop_start(env); RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status); - RxxV = hex_check_dfnan(RxxV); arch_fpop_end(env); return RxxV; } -- cgit v1.1 From 1cb532fe45991b70cf47c414e0bc4fe63f571a8a Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:39 -0500 Subject: Hexagon (target/hexagon) replace float32_mul_pow2 with float32_scalbn Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-12-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index bb51f19..40b6e3d 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -143,12 +143,6 @@ void arch_fpop_end(CPUHexagonState *env) } } -static float32 float32_mul_pow2(float32 a, uint32_t p, float_status *fp_status) -{ - float32 b = make_float32((SF_BIAS + p) << SF_MANTBITS); - return float32_mul(a, b, fp_status); -} - int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, float_status *fp_status) { @@ -217,22 +211,22 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { /* Near quotient underflow / inexact Q */ PeV = 0x80; - RtV = float32_mul_pow2(RtV, -64, fp_status); - RsV = float32_mul_pow2(RsV, 64, fp_status); + RtV = float32_scalbn(RtV, -64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) { /* Near quotient overflow */ PeV = 0x40; - RtV = float32_mul_pow2(RtV, 32, fp_status); - RsV = float32_mul_pow2(RsV, -32, fp_status); + RtV = float32_scalbn(RtV, 32, fp_status); + RsV = float32_scalbn(RsV, -32, fp_status); } else if (n_exp <= SF_MANTBITS + 2) { - RtV = float32_mul_pow2(RtV, 64, fp_status); - RsV = float32_mul_pow2(RsV, 64, fp_status); + RtV = float32_scalbn(RtV, 64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); } else if (d_exp <= 1) { - RtV = float32_mul_pow2(RtV, 32, fp_status); - RsV = float32_mul_pow2(RsV, 32, fp_status); + RtV = float32_scalbn(RtV, 32, fp_status); + RsV = float32_scalbn(RsV, 32, fp_status); } else if (d_exp > 252) { - RtV = float32_mul_pow2(RtV, -32, fp_status); - RsV = float32_mul_pow2(RsV, -32, fp_status); + RtV = float32_scalbn(RtV, -32, fp_status); + RsV = float32_scalbn(RsV, -32, fp_status); } RdV = 0; ret = 1; @@ -274,7 +268,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, /* Basic checks passed */ r_exp = float32_getexp(RsV); if (r_exp <= 24) { - RsV = float32_mul_pow2(RsV, 64, fp_status); + RsV = float32_scalbn(RsV, 64, fp_status); PeV = 0xe0; } RdV = 0; -- cgit v1.1 From b3f37abdd3f4919c81ea42f9f729875544623df2 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:40 -0500 Subject: Hexagon (target/hexagon) use softfloat for float-to-int conversions Use the proper return for helpers that convert to unsigned Remove target/hexagon/conv_emu.[ch] Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-13-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/conv_emu.c | 177 -------------------------------------------- target/hexagon/conv_emu.h | 31 -------- target/hexagon/fma_emu.c | 1 - target/hexagon/helper.h | 16 ++-- target/hexagon/meson.build | 1 - target/hexagon/op_helper.c | 169 ++++++++++++++++++++++++++++++++---------- tests/tcg/hexagon/fpstuff.c | 145 ++++++++++++++++++++++++++++++++++++ 7 files changed, 281 insertions(+), 259 deletions(-) delete mode 100644 target/hexagon/conv_emu.c delete mode 100644 target/hexagon/conv_emu.h diff --git a/target/hexagon/conv_emu.c b/target/hexagon/conv_emu.c deleted file mode 100644 index 3985b10..0000000 --- a/target/hexagon/conv_emu.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include "qemu/osdep.h" -#include "qemu/host-utils.h" -#include "fpu/softfloat.h" -#include "macros.h" -#include "conv_emu.h" - -#define LL_MAX_POS 0x7fffffffffffffffULL -#define MAX_POS 0x7fffffffU - -static uint64_t conv_f64_to_8u_n(float64 in, int will_negate, - float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - if (float64_is_infinity(in)) { - float_raise(float_flag_invalid, fp_status); - if (float64_is_neg(in)) { - return 0ULL; - } else { - return ~0ULL; - } - } - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return ~0ULL; - } - if (float64_is_zero(in)) { - return 0; - } - if (sign) { - float_raise(float_flag_invalid, fp_status); - return 0; - } - if (float64_lt(in, float64_half, fp_status)) { - /* Near zero, captures large fracshifts, denorms, etc */ - float_raise(float_flag_inexact, fp_status); - switch (get_float_rounding_mode(fp_status)) { - case float_round_down: - if (will_negate) { - return 1; - } else { - return 0; - } - case float_round_up: - if (!will_negate) { - return 1; - } else { - return 0; - } - default: - return 0; /* nearest or towards zero */ - } - } - return float64_to_uint64(in, fp_status); -} - -static void clr_float_exception_flags(uint8_t flag, float_status *fp_status) -{ - uint8_t flags = fp_status->float_exception_flags; - flags &= ~flag; - set_float_exception_flags(flags, fp_status); -} - -static uint32_t conv_df_to_4u_n(float64 fp64, int will_negate, - float_status *fp_status) -{ - uint64_t tmp; - tmp = conv_f64_to_8u_n(fp64, will_negate, fp_status); - if (tmp > 0x00000000ffffffffULL) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - return ~0U; - } - return (uint32_t)tmp; -} - -uint64_t conv_df_to_8u(float64 in, float_status *fp_status) -{ - return conv_f64_to_8u_n(in, 0, fp_status); -} - -uint32_t conv_df_to_4u(float64 in, float_status *fp_status) -{ - return conv_df_to_4u_n(in, 0, fp_status); -} - -int64_t conv_df_to_8s(float64 in, float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - uint64_t tmp; - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return -1; - } - if (sign) { - float64 minus_fp64 = float64_abs(in); - tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status); - } else { - tmp = conv_f64_to_8u_n(in, 0, fp_status); - } - if (tmp > (LL_MAX_POS + sign)) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - tmp = (LL_MAX_POS + sign); - } - if (sign) { - return -tmp; - } else { - return tmp; - } -} - -int32_t conv_df_to_4s(float64 in, float_status *fp_status) -{ - uint8_t sign = float64_is_neg(in); - uint64_t tmp; - if (float64_is_any_nan(in)) { - float_raise(float_flag_invalid, fp_status); - return -1; - } - if (sign) { - float64 minus_fp64 = float64_abs(in); - tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status); - } else { - tmp = conv_f64_to_8u_n(in, 0, fp_status); - } - if (tmp > (MAX_POS + sign)) { - clr_float_exception_flags(float_flag_inexact, fp_status); - float_raise(float_flag_invalid, fp_status); - tmp = (MAX_POS + sign); - } - if (sign) { - return -tmp; - } else { - return tmp; - } -} - -uint64_t conv_sf_to_8u(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_8u(fp64, fp_status); -} - -uint32_t conv_sf_to_4u(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_4u(fp64, fp_status); -} - -int64_t conv_sf_to_8s(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_8s(fp64, fp_status); -} - -int32_t conv_sf_to_4s(float32 in, float_status *fp_status) -{ - float64 fp64 = float32_to_float64(in, fp_status); - return conv_df_to_4s(fp64, fp_status); -} diff --git a/target/hexagon/conv_emu.h b/target/hexagon/conv_emu.h deleted file mode 100644 index cade9de..0000000 --- a/target/hexagon/conv_emu.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef HEXAGON_CONV_EMU_H -#define HEXAGON_CONV_EMU_H - -uint64_t conv_sf_to_8u(float32 in, float_status *fp_status); -uint32_t conv_sf_to_4u(float32 in, float_status *fp_status); -int64_t conv_sf_to_8s(float32 in, float_status *fp_status); -int32_t conv_sf_to_4s(float32 in, float_status *fp_status); - -uint64_t conv_df_to_8u(float64 in, float_status *fp_status); -uint32_t conv_df_to_4u(float64 in, float_status *fp_status); -int64_t conv_df_to_8s(float64 in, float_status *fp_status); -int32_t conv_df_to_4s(float64 in, float_status *fp_status); - -#endif diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c index f324b83..d3b45d4 100644 --- a/target/hexagon/fma_emu.c +++ b/target/hexagon/fma_emu.c @@ -19,7 +19,6 @@ #include "qemu/int128.h" #include "fpu/softfloat.h" #include "macros.h" -#include "conv_emu.h" #include "fma_emu.h" #define DF_INF_EXP 0x7ff diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index a5f340c..715c246 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -38,21 +38,21 @@ DEF_HELPER_2(conv_ud2sf, f32, env, s64) DEF_HELPER_2(conv_ud2df, f64, env, s64) DEF_HELPER_2(conv_d2sf, f32, env, s64) DEF_HELPER_2(conv_d2df, f64, env, s64) -DEF_HELPER_2(conv_sf2uw, s32, env, f32) +DEF_HELPER_2(conv_sf2uw, i32, env, f32) DEF_HELPER_2(conv_sf2w, s32, env, f32) -DEF_HELPER_2(conv_sf2ud, s64, env, f32) +DEF_HELPER_2(conv_sf2ud, i64, env, f32) DEF_HELPER_2(conv_sf2d, s64, env, f32) -DEF_HELPER_2(conv_df2uw, s32, env, f64) +DEF_HELPER_2(conv_df2uw, i32, env, f64) DEF_HELPER_2(conv_df2w, s32, env, f64) -DEF_HELPER_2(conv_df2ud, s64, env, f64) +DEF_HELPER_2(conv_df2ud, i64, env, f64) DEF_HELPER_2(conv_df2d, s64, env, f64) -DEF_HELPER_2(conv_sf2uw_chop, s32, env, f32) +DEF_HELPER_2(conv_sf2uw_chop, i32, env, f32) DEF_HELPER_2(conv_sf2w_chop, s32, env, f32) -DEF_HELPER_2(conv_sf2ud_chop, s64, env, f32) +DEF_HELPER_2(conv_sf2ud_chop, i64, env, f32) DEF_HELPER_2(conv_sf2d_chop, s64, env, f32) -DEF_HELPER_2(conv_df2uw_chop, s32, env, f64) +DEF_HELPER_2(conv_df2uw_chop, i32, env, f64) DEF_HELPER_2(conv_df2w_chop, s32, env, f64) -DEF_HELPER_2(conv_df2ud_chop, s64, env, f64) +DEF_HELPER_2(conv_df2ud_chop, i64, env, f64) DEF_HELPER_2(conv_df2d_chop, s64, env, f64) DEF_HELPER_3(sfadd, f32, env, f32, f32) DEF_HELPER_3(sfsub, f32, env, f32, f32) diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index bb0b4fb..6fd9360 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -173,7 +173,6 @@ hexagon_ss.add(files( 'printinsn.c', 'arch.c', 'fma_emu.c', - 'conv_emu.c', )) target_arch += {'hexagon': hexagon_ss} diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 478421d..b70c5d6 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -25,7 +25,6 @@ #include "arch.h" #include "hex_arch_types.h" #include "fma_emu.h" -#include "conv_emu.h" #define SF_BIAS 127 #define SF_MANTBITS 23 @@ -438,11 +437,17 @@ float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV) return RddV; } -int32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV) +uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - RdV = conv_sf_to_4u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float32_to_uint32(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -451,16 +456,28 @@ int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV) { int32_t RdV; arch_fpop_start(env); - RdV = conv_sf_to_4s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float32_to_int32(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV) +uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - RddV = conv_sf_to_8u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float32_to_uint64(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -469,16 +486,28 @@ int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV) { int64_t RddV; arch_fpop_start(env); - RddV = conv_sf_to_8s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float32_to_int64(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV) +uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - RdV = conv_df_to_4u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float64_to_uint32(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -487,16 +516,28 @@ int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV) { int32_t RdV; arch_fpop_start(env); - RdV = conv_df_to_4s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float64_to_int32(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV) +uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - RddV = conv_df_to_8u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float64_to_uint64(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -505,17 +546,28 @@ int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV) { int64_t RddV; arch_fpop_start(env); - RddV = conv_df_to_8s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float64_to_int64(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV) +uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_sf_to_4u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -524,18 +576,28 @@ int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV) { int32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_sf_to_4s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV) +uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_sf_to_8u(RsV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -544,18 +606,28 @@ int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV) { int64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_sf_to_8s(RsV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float32_is_any_nan(RsV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status); + } arch_fpop_end(env); return RddV; } -int32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) +uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) { - int32_t RdV; + uint32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_df_to_4u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = 0; + } else { + RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } @@ -564,18 +636,28 @@ int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV) { int32_t RdV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RdV = conv_df_to_4s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RdV = -1; + } else { + RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RdV; } -int64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV) +uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV) { - int64_t RddV; + uint64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_df_to_8u(RssV, &env->fp_status); + /* Hexagon checks the sign before rounding */ + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = 0; + } else { + RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } @@ -584,8 +666,13 @@ int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV) { int64_t RddV; arch_fpop_start(env); - set_float_rounding_mode(float_round_to_zero, &env->fp_status); - RddV = conv_df_to_8s(RssV, &env->fp_status); + /* Hexagon returns -1 for NaN */ + if (float64_is_any_nan(RssV)) { + float_raise(float_flag_invalid, &env->fp_status); + RddV = -1; + } else { + RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status); + } arch_fpop_end(env); return RddV; } diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index e4f1a0e..6b60f92 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -37,10 +37,12 @@ const int SF_NaN = 0x7fc00000; const int SF_NaN_special = 0x7f800001; const int SF_ANY = 0x3f800000; const int SF_HEX_NAN = 0xffffffff; +const int SF_small_neg = 0xab98fba8; const long long DF_NaN = 0x7ff8000000000000ULL; const long long DF_ANY = 0x3f80000000000000ULL; const long long DF_HEX_NAN = 0xffffffffffffffffULL; +const long long DF_small_neg = 0xbd731f7500000000ULL; int err; @@ -358,12 +360,155 @@ static void check_canonical_NaN(void) check_fpstatus(usr, 0); } +static void check_float2int_convs() +{ + int res32; + long long res64; + int usr; + + /* + * Check that the various forms of float-to-unsigned + * check sign before rounding + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + /* + * Check that the various forms of float-to-signed return -1 for NaN + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); +} + int main() { check_compare_exception(); check_sfminmax(); check_dfminmax(); check_canonical_NaN(); + check_float2int_convs(); puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; -- cgit v1.1 From 9fe33c0e7048b979d39ee579962d94871ea42e0a Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:41 -0500 Subject: Hexagon (target/hexagon) cleanup ternary operators in semantics Change (cond ? (res = x) : (res = y)) to res = (cond ? x : y) This makes the semnatics easier to for idef-parser to deal with The following instructions are impacted C2_any8 C2_all8 C2_mux C2_muxii C2_muxir C2_muxri Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-14-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/imported/compare.idef | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target/hexagon/imported/compare.idef b/target/hexagon/imported/compare.idef index 3551467..abd016f 100644 --- a/target/hexagon/imported/compare.idef +++ b/target/hexagon/imported/compare.idef @@ -198,11 +198,11 @@ Q6INSN(C4_or_orn,"Pd4=or(Ps4,or(Pt4,!Pu4))",ATTRIBS(A_CRSLOT23), Q6INSN(C2_any8,"Pd4=any8(Ps4)",ATTRIBS(A_CRSLOT23), "Logical ANY of low 8 predicate bits", -{ PsV ? (PdV=0xff) : (PdV=0x00); }) +{ PdV = (PsV ? 0xff : 0x00); }) Q6INSN(C2_all8,"Pd4=all8(Ps4)",ATTRIBS(A_CRSLOT23), "Logical ALL of low 8 predicate bits", -{ (PsV==0xff) ? (PdV=0xff) : (PdV=0x00); }) +{ PdV = (PsV == 0xff ? 0xff : 0x00); }) Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(), "Pack the odd and even bits of two predicate registers", @@ -212,7 +212,7 @@ Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(), Q6INSN(C2_mux,"Rd32=mux(Pu4,Rs32,Rt32)",ATTRIBS(), "Scalar MUX", -{ (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=RtV); }) +{ RdV = (fLSBOLD(PuV) ? RsV : RtV); }) Q6INSN(C2_cmovenewit,"if (Pu4.new) Rd32=#s12",ATTRIBS(A_ARCHV2), @@ -269,18 +269,18 @@ Q6INSN(C2_ccombinewf,"if (!Pu4) Rdd32=combine(Rs32,Rt32)",ATTRIBS(A_ARCHV2), Q6INSN(C2_muxii,"Rd32=mux(Pu4,#s8,#S8)",ATTRIBS(A_ARCHV2), "Scalar MUX immediates", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=SiV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : SiV); }) Q6INSN(C2_muxir,"Rd32=mux(Pu4,Rs32,#s8)",ATTRIBS(A_ARCHV2), "Scalar MUX register immediate", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=siV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? RsV : siV); }) Q6INSN(C2_muxri,"Rd32=mux(Pu4,#s8,Rs32)",ATTRIBS(A_ARCHV2), "Scalar MUX register immediate", -{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=RsV); }) +{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : RsV); }) -- cgit v1.1 From 80be682844ddfeffa21576ce0cb61d06bf6b87f8 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:42 -0500 Subject: Hexagon (target/hexagon) cleanup reg_field_info definition Include size in declaration Remove {0, 0} entry Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-15-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/reg_fields.c | 3 +-- target/hexagon/reg_fields.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/target/hexagon/reg_fields.c b/target/hexagon/reg_fields.c index bdcab79..6713203 100644 --- a/target/hexagon/reg_fields.c +++ b/target/hexagon/reg_fields.c @@ -18,10 +18,9 @@ #include "qemu/osdep.h" #include "reg_fields.h" -const RegField reg_field_info[] = { +const RegField reg_field_info[NUM_REG_FIELDS] = { #define DEF_REG_FIELD(TAG, START, WIDTH) \ { START, WIDTH }, #include "reg_fields_def.h.inc" - { 0, 0 } #undef DEF_REG_FIELD }; diff --git a/target/hexagon/reg_fields.h b/target/hexagon/reg_fields.h index d3c86c94..9e2ad5d 100644 --- a/target/hexagon/reg_fields.h +++ b/target/hexagon/reg_fields.h @@ -23,8 +23,6 @@ typedef struct { int width; } RegField; -extern const RegField reg_field_info[]; - enum { #define DEF_REG_FIELD(TAG, START, WIDTH) \ TAG, @@ -33,4 +31,6 @@ enum { #undef DEF_REG_FIELD }; +extern const RegField reg_field_info[NUM_REG_FIELDS]; + #endif -- cgit v1.1 From a33872eb533c5b4d2eb7658fa07e6a281bfd609b Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:43 -0500 Subject: Hexagon (target/hexagon) move QEMU_GENERATE to only be on during macros.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-16-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/genptr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 6b74344..b87e264 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -15,7 +15,6 @@ * along with this program; if not, see . */ -#define QEMU_GENERATE #include "qemu/osdep.h" #include "qemu/log.h" #include "cpu.h" @@ -24,7 +23,9 @@ #include "insn.h" #include "opcodes.h" #include "translate.h" +#define QEMU_GENERATE /* Used internally by macros.h */ #include "macros.h" +#undef QEMU_GENERATE #include "gen_tcg.h" static inline TCGv gen_read_preg(TCGv pred, uint8_t num) -- cgit v1.1 From 85580a65577898288a29d849160601895979c661 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:44 -0500 Subject: Hexagon (target/hexagon) compile all debug code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change #if HEX_DEBUG to if (HEX_DEBUG) so the debug code doesn't bit rot Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-17-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/genptr.c | 72 ++++++++++++++++++++++---------------------- target/hexagon/helper.h | 2 -- target/hexagon/internal.h | 11 +++---- target/hexagon/op_helper.c | 14 +++------ target/hexagon/translate.c | 74 ++++++++++++++++++++++------------------------ target/hexagon/translate.h | 2 -- 6 files changed, 81 insertions(+), 94 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index b87e264..24d5758 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -42,17 +42,17 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot); tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero, val, hex_new_value[rnum]); -#if HEX_DEBUG - /* - * Do this so HELPER(debug_commit_end) will know - * - * Note that slot_mask indicates the value is not written - * (i.e., slot was cancelled), so we create a true/false value before - * or'ing with hex_reg_written[rnum]. - */ - tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); - tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); -#endif + if (HEX_DEBUG) { + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); + } tcg_temp_free(zero); tcg_temp_free(slot_mask); @@ -61,10 +61,10 @@ static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot) static inline void gen_log_reg_write(int rnum, TCGv val) { tcg_gen_mov_tl(hex_new_value[rnum], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum], 1); + } } static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot) @@ -84,19 +84,19 @@ static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot) tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1], slot_mask, zero, val32, hex_new_value[rnum + 1]); -#if HEX_DEBUG - /* - * Do this so HELPER(debug_commit_end) will know - * - * Note that slot_mask indicates the value is not written - * (i.e., slot was cancelled), so we create a true/false value before - * or'ing with hex_reg_written[rnum]. - */ - tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); - tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); - tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1], - slot_mask); -#endif + if (HEX_DEBUG) { + /* + * Do this so HELPER(debug_commit_end) will know + * + * Note that slot_mask indicates the value is not written + * (i.e., slot was cancelled), so we create a true/false value before + * or'ing with hex_reg_written[rnum]. + */ + tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero); + tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask); + tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1], + slot_mask); + } tcg_temp_free(val32); tcg_temp_free(zero); @@ -107,17 +107,17 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) { /* Low word */ tcg_gen_extrl_i64_i32(hex_new_value[rnum], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum], 1); + } /* High word */ tcg_gen_extrh_i64_i32(hex_new_value[rnum + 1], val); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); + } } static inline void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 715c246..efe6069 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -19,11 +19,9 @@ #include "helper_protos_generated.h.inc" DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) -#if HEX_DEBUG DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) -#endif DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 2da85c8..6b20aff 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -22,11 +22,12 @@ * Change HEX_DEBUG to 1 to turn on debugging output */ #define HEX_DEBUG 0 -#if HEX_DEBUG -#define HEX_DEBUG_LOG(...) qemu_log(__VA_ARGS__) -#else -#define HEX_DEBUG_LOG(...) do { } while (0) -#endif +#define HEX_DEBUG_LOG(...) \ + do { \ + if (HEX_DEBUG) { \ + qemu_log(__VA_ARGS__); \ + } \ + } while (0) int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index b70c5d6..33b6713 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -56,10 +56,10 @@ static void log_reg_write(CPUHexagonState *env, int rnum, HEX_DEBUG_LOG("\n"); env->new_value[rnum] = val; -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - env->reg_written[rnum] = 1; -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + env->reg_written[rnum] = 1; + } } static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) @@ -117,7 +117,6 @@ static void write_new_pc(CPUHexagonState *env, target_ulong addr) } } -#if HEX_DEBUG /* Handy place to set a breakpoint */ void HELPER(debug_start_packet)(CPUHexagonState *env) { @@ -128,14 +127,12 @@ void HELPER(debug_start_packet)(CPUHexagonState *env) env->reg_written[i] = 0; } } -#endif static int32_t new_pred_value(CPUHexagonState *env, int pnum) { return env->new_pred_value[pnum]; } -#if HEX_DEBUG /* Checks for bookkeeping errors between disassembly context and runtime */ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check) { @@ -145,7 +142,6 @@ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check) g_assert_not_reached(); } } -#endif void HELPER(commit_store)(CPUHexagonState *env, int slot_num) { @@ -171,7 +167,6 @@ void HELPER(commit_store)(CPUHexagonState *env, int slot_num) } } -#if HEX_DEBUG static void print_store(CPUHexagonState *env, int slot) { if (!(env->slot_cancelled & (1 << slot))) { @@ -255,7 +250,6 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) env->gpr[HEX_REG_QEMU_INSN_CNT]); } -#endif static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS) { diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 0468422..9a37644 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -35,9 +35,7 @@ TCGv hex_this_PC; TCGv hex_slot_cancelled; TCGv hex_branch_taken; TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; -#if HEX_DEBUG TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -#endif TCGv hex_new_pred_value[NUM_PREGS]; TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; @@ -90,7 +88,6 @@ static void gen_exception_end_tb(DisasContext *ctx, int excp) } -#if HEX_DEBUG #define PACKET_BUFFER_LEN 1028 static void print_pkt(Packet *pkt) { @@ -99,10 +96,12 @@ static void print_pkt(Packet *pkt) HEX_DEBUG_LOG("%s", buf->str); g_string_free(buf, true); } -#define HEX_DEBUG_PRINT_PKT(pkt) print_pkt(pkt) -#else -#define HEX_DEBUG_PRINT_PKT(pkt) /* nothing */ -#endif +#define HEX_DEBUG_PRINT_PKT(pkt) \ + do { \ + if (HEX_DEBUG) { \ + print_pkt(pkt); \ + } \ + } while (0) static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, uint32_t words[]) @@ -179,11 +178,11 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt) tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); ctx->s1_store_processed = false; -#if HEX_DEBUG - /* Handy place to set a breakpoint before the packet executes */ - gen_helper_debug_start_packet(cpu_env); - tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); -#endif + if (HEX_DEBUG) { + /* Handy place to set a breakpoint before the packet executes */ + gen_helper_debug_start_packet(cpu_env); + tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); + } /* Initialize the runtime state for packet semantics */ if (need_pc(pkt)) { @@ -308,10 +307,11 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt) for (i = 0; i < ctx->preg_log_idx; i++) { int pred_num = ctx->preg_log[i]; tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); -#if HEX_DEBUG - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pred_num); -#endif + if (HEX_DEBUG) { + /* Do this so HELPER(debug_commit_end) will know */ + tcg_gen_ori_tl(hex_pred_written, hex_pred_written, + 1 << pred_num); + } } } @@ -322,13 +322,13 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt) static void gen_check_store_width(DisasContext *ctx, int slot_num) { -#if HEX_DEBUG - TCGv slot = tcg_const_tl(slot_num); - TCGv check = tcg_const_tl(ctx->store_width[slot_num]); - gen_helper_debug_check_store_width(cpu_env, slot, check); - tcg_temp_free(slot); - tcg_temp_free(check); -#endif + if (HEX_DEBUG) { + TCGv slot = tcg_const_tl(slot_num); + TCGv check = tcg_const_tl(ctx->store_width[slot_num]); + gen_helper_debug_check_store_width(cpu_env, slot, check); + tcg_temp_free(slot); + tcg_temp_free(check); + } } static bool slot_is_predicated(Packet *pkt, int slot_num) @@ -482,8 +482,7 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) process_store_log(ctx, pkt); process_dczeroa(ctx, pkt); update_exec_counters(ctx, pkt); -#if HEX_DEBUG - { + if (HEX_DEBUG) { TCGv has_st0 = tcg_const_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa); TCGv has_st1 = @@ -495,7 +494,6 @@ static void gen_commit_packet(DisasContext *ctx, Packet *pkt) tcg_temp_free(has_st0); tcg_temp_free(has_st1); } -#endif if (pkt->pkt_has_cof) { gen_end_tb(ctx); @@ -655,9 +653,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) #define NAME_LEN 64 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -#if HEX_DEBUG static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -#endif static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; @@ -670,11 +666,11 @@ void hexagon_translate_init(void) opcode_init(); -#if HEX_DEBUG - if (!qemu_logfile) { - qemu_set_log(qemu_loglevel); + if (HEX_DEBUG) { + if (!qemu_logfile) { + qemu_set_log(qemu_loglevel); + } } -#endif for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { hex_gpr[i] = tcg_global_mem_new(cpu_env, @@ -686,13 +682,13 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, new_value[i]), new_value_names[i]); -#if HEX_DEBUG - snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", - hexagon_regnames[i]); - hex_reg_written[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, reg_written[i]), - reg_written_names[i]); -#endif + if (HEX_DEBUG) { + snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", + hexagon_regnames[i]); + hex_reg_written[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, reg_written[i]), + reg_written_names[i]); + } } for (i = 0; i < NUM_PREGS; i++) { hex_pred[i] = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 97b12a7..703fd13 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -41,11 +41,9 @@ typedef struct DisasContext { static inline void ctx_log_reg_write(DisasContext *ctx, int rnum) { -#if HEX_DEBUG if (test_bit(rnum, ctx->regs_written)) { HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum); } -#endif ctx->reg_log[ctx->reg_log_idx] = rnum; ctx->reg_log_idx++; set_bit(rnum, ctx->regs_written); -- cgit v1.1 From d934c16d8a1e0fb82fd4abfa54dcb5217430577c Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:45 -0500 Subject: Hexagon (target/hexagon) add F2_sfrecipa instruction Rd32,Pe4 = sfrecipa(Rs32, Rt32) Recripocal approx Test cases in tests/tcg/hexagon/multi_result.c FP exception tests added to tests/tcg/hexagon/fpstuff.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-18-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 26 +++++++++-- target/hexagon/arch.h | 2 + target/hexagon/gen_tcg.h | 21 +++++++++ target/hexagon/helper.h | 1 + target/hexagon/imported/encode_pp.def | 1 + target/hexagon/imported/float.idef | 16 +++++++ target/hexagon/op_helper.c | 37 ++++++++++++++++ tests/tcg/hexagon/Makefile.target | 1 + tests/tcg/hexagon/fpstuff.c | 82 +++++++++++++++++++++++++++++++++++ tests/tcg/hexagon/multi_result.c | 68 +++++++++++++++++++++++++++++ 10 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 tests/tcg/hexagon/multi_result.c diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 40b6e3d..46edf45 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -181,12 +181,13 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, /* or put Inf in num fixup? */ uint8_t RsV_sign = float32_is_neg(RsV); uint8_t RtV_sign = float32_is_neg(RtV); + /* Check that RsV is NOT infinite before we overwrite it */ + if (!float32_is_infinity(RsV)) { + float_raise(float_flag_divbyzero, fp_status); + } RsV = infinite_float32(RsV_sign ^ RtV_sign); RtV = float32_one; RdV = float32_one; - if (float32_is_infinity(RsV)) { - float_raise(float_flag_divbyzero, fp_status); - } } else if (float32_is_infinity(RtV)) { RsV = make_float32(0x80000000 & (RsV ^ RtV)); RtV = float32_one; @@ -279,3 +280,22 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, *adjust = PeV; return ret; } + +const uint8_t recip_lookup_table[128] = { + 0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4, + 0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9, + 0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1, + 0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b, + 0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087, + 0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075, + 0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065, + 0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056, + 0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049, + 0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c, + 0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030, + 0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025, + 0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b, + 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012, + 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, + 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000, +}; diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h index 6e0b0d9..b6634e9 100644 --- a/target/hexagon/arch.h +++ b/target/hexagon/arch.h @@ -30,4 +30,6 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, float_status *fp_status); +extern const uint8_t recip_lookup_table[128]; + #endif diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index a30048e..428a670 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -195,6 +195,27 @@ #define fGEN_TCG_S4_stored_locked(SHORTCODE) \ do { SHORTCODE; READ_PREG(PdV, PdN); } while (0) +/* + * Mathematical operations with more than one definition require + * special handling + */ + +/* + * Approximate reciprocal + * r3,p1 = sfrecipa(r0, r1) + * + * The helper packs the 2 32-bit results into a 64-bit value, + * so unpack them into the proper results. + */ +#define fGEN_TCG_F2_sfrecipa(SHORTCODE) \ + do { \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + gen_helper_sfrecipa(tmp, cpu_env, RsV, RtV); \ + tcg_gen_extrh_i64_i32(RdV, tmp); \ + tcg_gen_extrl_i64_i32(PeV, tmp); \ + tcg_temp_free_i64(tmp); \ + } while (0) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index efe6069..b377293 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -24,6 +24,7 @@ DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) +DEF_HELPER_3(sfrecipa, i64, env, f32, f32) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index c21cb73..b01b4d7 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1028,6 +1028,7 @@ MPY_ENC(F2_sfmin, "1011","ddddd","0","0","0","1","01") MPY_ENC(F2_sfmpy, "1011","ddddd","0","0","1","0","00") MPY_ENC(F2_sffixupn, "1011","ddddd","0","0","1","1","00") MPY_ENC(F2_sffixupd, "1011","ddddd","0","0","1","1","01") +MPY_ENC(F2_sfrecipa, "1011","ddddd","1","1","1","1","ee") DEF_FIELDROW_DESC32(ICLASS_M" 1100 -------- PP------ --------","[#12] Rd=(Rs,Rt)") DEF_FIELD32(ICLASS_M" 1100 -------- PP------ --!-----",Mc_tH,"Rt is High") /*Rt high */ diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef index 76cecfe..eb54158 100644 --- a/target/hexagon/imported/float.idef +++ b/target/hexagon/imported/float.idef @@ -146,6 +146,22 @@ Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(), }) +Q6INSN(F2_sfrecipa,"Rd32,Pe4=sfrecipa(Rs32,Rt32)",ATTRIBS(), +"Reciprocal Approximation for Division", +{ + fHIDE(int idx;) + fHIDE(int adjust;) + fHIDE(int mant;) + fHIDE(int exp;) + if (fSF_RECIP_COMMON(RsV,RtV,RdV,adjust)) { + PeV = adjust; + idx = (RtV >> 16) & 0x7f; + mant = (fSF_RECIP_LOOKUP(idx) << 15) | 1; + exp = fSF_BIAS() - (fSF_GETEXP(RtV) - fSF_BIAS()) - 1; + RdV = fMAKESF(fGETBIT(31,RtV),exp,mant); + } +}) + Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(), "Fix Up Numerator", { diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 33b6713..75861e2 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -289,6 +289,43 @@ int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) return new_ptr; } +static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant) +{ + return make_float32( + ((sign & 1) << 31) | + ((exp & 0xff) << SF_MANTBITS) | + (mant & ((1 << SF_MANTBITS) - 1))); +} + +/* + * sfrecipa, sfinvsqrta have two 32-bit results + * r0,p0=sfrecipa(r1,r2) + * r0,p0=sfinvsqrta(r1) + * + * Since helpers can only return a single value, we pack the two results + * into a 64-bit value. + */ +uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV) +{ + int32_t PeV = 0; + float32 RdV; + int idx; + int adjust; + int mant; + int exp; + + arch_fpop_start(env); + if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) { + PeV = adjust; + idx = (RtV >> 16) & 0x7f; + mant = (recip_lookup_table[idx] << 15) | 1; + exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1; + RdV = build_float32(extract32(RtV, 31, 1), exp, mant); + } + arch_fpop_end(env); + return ((uint64_t)RdV << 32) | PeV; +} + /* * mem_noshuf * Section 5.5 of the Hexagon V67 Programmer's Reference Manual diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 616af69..18218ad 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -39,6 +39,7 @@ HEX_TESTS = first HEX_TESTS += misc HEX_TESTS += preg_alias HEX_TESTS += dual_stores +HEX_TESTS += multi_result HEX_TESTS += mem_noshuf HEX_TESTS += atomics HEX_TESTS += fpstuff diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 6b60f92..8e3ba78 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -250,6 +250,87 @@ static void check_dfminmax(void) check_fpstatus(usr, FPINVF); } +static void check_recip_exception(void) +{ + int result; + int usr; + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 non-zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + /* + * Check that sfrecipa properly sets divid-by-zero + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x885dc960), "r"(0x80000000) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, FPDBZF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x7f800000), "r"(SF_ZERO) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, 0); +} + static void check_canonical_NaN(void) { int sf_result; @@ -507,6 +588,7 @@ int main() check_compare_exception(); check_sfminmax(); check_dfminmax(); + check_recip_exception(); check_canonical_NaN(); check_float2int_convs(); diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c new file mode 100644 index 0000000..cb7dd31 --- /dev/null +++ b/tests/tcg/hexagon/multi_result.c @@ -0,0 +1,68 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include + +static int sfrecipa(int Rs, int Rt, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs), "r"(Rt) + : "p0"); + *pred_result = predval; + return result; +} + +int err; + +static void check(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", val, expect); + err++; + } +} + +static void check_p(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%02x != 0x%02x\n", val, expect); + err++; + } +} + +static void test_sfrecipa() +{ + int res; + int pred_result; + + res = sfrecipa(0x04030201, 0x05060708, &pred_result); + check(res, 0x59f38001); + check_p(pred_result, 0x00); +} + +int main() +{ + test_sfrecipa(); + + puts(err ? "FAIL" : "PASS"); + return err; +} -- cgit v1.1 From dd8705bdf529d2c694ec3a4d4a2c18bb770d5c6c Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:46 -0500 Subject: Hexagon (target/hexagon) add F2_sfinvsqrta Rd32,Pe4 = sfinvsqrta(Rs32) Square root approx The helper packs the 2 32-bit results into a 64-bit value, and the fGEN_TCG override unpacks them into the proper results. Test cases in tests/tcg/hexagon/multi_result.c FP exception tests added to tests/tcg/hexagon/fpstuff.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-19-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 21 ++++++++++++++++++++- target/hexagon/arch.h | 2 ++ target/hexagon/gen_tcg.h | 16 ++++++++++++++++ target/hexagon/helper.h | 1 + target/hexagon/imported/encode_pp.def | 1 + target/hexagon/imported/float.idef | 16 ++++++++++++++++ target/hexagon/op_helper.c | 21 +++++++++++++++++++++ tests/tcg/hexagon/fpstuff.c | 15 +++++++++++++++ tests/tcg/hexagon/multi_result.c | 29 +++++++++++++++++++++++++++++ 9 files changed, 121 insertions(+), 1 deletion(-) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 46edf45..dee852e 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -247,7 +247,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, int r_exp; int ret = 0; RsV = *Rs; - if (float32_is_infinity(RsV)) { + if (float32_is_any_nan(RsV)) { if (extract32(RsV, 22, 1) == 0) { float_raise(float_flag_invalid, fp_status); } @@ -299,3 +299,22 @@ const uint8_t recip_lookup_table[128] = { 0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000, }; + +const uint8_t invsqrt_lookup_table[128] = { + 0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057, + 0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045, + 0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036, + 0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028, + 0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d, + 0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012, + 0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009, + 0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001, + 0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4, + 0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb, + 0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6, + 0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3, + 0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093, + 0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084, + 0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077, + 0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b, +}; diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h index b6634e9..3e0c334 100644 --- a/target/hexagon/arch.h +++ b/target/hexagon/arch.h @@ -32,4 +32,6 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust, extern const uint8_t recip_lookup_table[128]; +extern const uint8_t invsqrt_lookup_table[128]; + #endif diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 428a670..d78e7b8 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -216,6 +216,22 @@ tcg_temp_free_i64(tmp); \ } while (0) +/* + * Approximation of the reciprocal square root + * r1,p0 = sfinvsqrta(r0) + * + * The helper packs the 2 32-bit results into a 64-bit value, + * so unpack them into the proper results. + */ +#define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \ + do { \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \ + tcg_gen_extrh_i64_i32(RdV, tmp); \ + tcg_gen_extrl_i64_i32(PeV, tmp); \ + tcg_temp_free_i64(tmp); \ + } while (0) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index b377293..cb7508f 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -25,6 +25,7 @@ DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) +DEF_HELPER_2(sfinvsqrta, i64, env, f32) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index b01b4d7..18fe45d 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1642,6 +1642,7 @@ SH2_RR_ENC(F2_conv_sf2w, "1011","100","-","000","ddddd") SH2_RR_ENC(F2_conv_sf2uw_chop, "1011","011","-","001","ddddd") SH2_RR_ENC(F2_conv_sf2w_chop, "1011","100","-","001","ddddd") SH2_RR_ENC(F2_sffixupr, "1011","101","-","000","ddddd") +SH2_RR_ENC(F2_sfinvsqrta, "1011","111","-","0ee","ddddd") DEF_FIELDROW_DESC32(ICLASS_S2op" 1100 -------- PP------ --------","[#12] Rd=(Rs,#u6)") diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef index eb54158..3e75bc4 100644 --- a/target/hexagon/imported/float.idef +++ b/target/hexagon/imported/float.idef @@ -178,6 +178,22 @@ Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(), RdV = RtV; }) +Q6INSN(F2_sfinvsqrta,"Rd32,Pe4=sfinvsqrta(Rs32)",ATTRIBS(), +"Reciprocal Square Root Approximation", +{ + fHIDE(int idx;) + fHIDE(int adjust;) + fHIDE(int mant;) + fHIDE(int exp;) + if (fSF_INVSQRT_COMMON(RsV,RdV,adjust)) { + PeV = adjust; + idx = (RsV >> 17) & 0x7f; + mant = (fSF_INVSQRT_LOOKUP(idx) << 15); + exp = fSF_BIAS() - ((fSF_GETEXP(RsV) - fSF_BIAS()) >> 1) - 1; + RdV = fMAKESF(fGETBIT(31,RsV),exp,mant); + } +}) + Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(), "Fix Up Radicand", { diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 75861e2..a25fb98 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -326,6 +326,27 @@ uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV) return ((uint64_t)RdV << 32) | PeV; } +uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) +{ + int PeV = 0; + float32 RdV; + int idx; + int adjust; + int mant; + int exp; + + arch_fpop_start(env); + if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) { + PeV = adjust; + idx = (RsV >> 17) & 0x7f; + mant = (invsqrt_lookup_table[idx] << 15); + exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1; + RdV = build_float32(extract32(RsV, 31, 1), exp, mant); + } + arch_fpop_end(env); + return ((uint64_t)RdV << 32) | PeV; +} + /* * mem_noshuf * Section 5.5 of the Hexagon V67 Programmer's Reference Manual diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 8e3ba78..0dff429 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -441,6 +441,20 @@ static void check_canonical_NaN(void) check_fpstatus(usr, 0); } +static void check_invsqrta(void) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(0x7f800000) + : "p0"); + check32(result, 0xff800000); + check32(predval, 0x0); +} + static void check_float2int_convs() { int res32; @@ -590,6 +604,7 @@ int main() check_dfminmax(); check_recip_exception(); check_canonical_NaN(); + check_invsqrta(); check_float2int_convs(); puts(err ? "FAIL" : "PASS"); diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c index cb7dd31..67aa462 100644 --- a/tests/tcg/hexagon/multi_result.c +++ b/tests/tcg/hexagon/multi_result.c @@ -31,6 +31,20 @@ static int sfrecipa(int Rs, int Rt, int *pred_result) return result; } +static int sfinvsqrta(int Rs, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs) + : "p0"); + *pred_result = predval; + return result; +} + int err; static void check(int val, int expect) @@ -59,9 +73,24 @@ static void test_sfrecipa() check_p(pred_result, 0x00); } +static void test_sfinvsqrta() +{ + int res; + int pred_result; + + res = sfinvsqrta(0x04030201, &pred_result); + check(res, 0x4d330000); + check_p(pred_result, 0xe0); + + res = sfinvsqrta(0x0, &pred_result); + check(res, 0x3f800000); + check_p(pred_result, 0x0); +} + int main() { test_sfrecipa(); + test_sfinvsqrta(); puts(err ? "FAIL" : "PASS"); return err; -- cgit v1.1 From da74cd2dced1ca36ffa864e70cf5ee8d3a8c1fab Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:47 -0500 Subject: Hexagon (target/hexagon) add A5_ACS (vacsh) Rxx32,Pe4 = vacsh(Rss32, Rtt32) Add compare and select elements of two vectors Test cases in tests/tcg/hexagon/multi_result.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-20-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 5 +++ target/hexagon/helper.h | 2 + target/hexagon/imported/alu.idef | 19 ++++++++++ target/hexagon/imported/encode_pp.def | 1 + target/hexagon/op_helper.c | 33 +++++++++++++++++ tests/tcg/hexagon/multi_result.c | 69 +++++++++++++++++++++++++++++++++++ 6 files changed, 129 insertions(+) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index d78e7b8..93310c5 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -199,6 +199,11 @@ * Mathematical operations with more than one definition require * special handling */ +#define fGEN_TCG_A5_ACS(SHORTCODE) \ + do { \ + gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + } while (0) /* * Approximate reciprocal diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index cb7508f..3824ae0 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -26,6 +26,8 @@ DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) +DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef index 45cc529..e8cc52c 100644 --- a/target/hexagon/imported/alu.idef +++ b/target/hexagon/imported/alu.idef @@ -1240,6 +1240,25 @@ MINMAX(uw,WORD,UWORD,2) #undef VMINORMAX3 +Q6INSN(A5_ACS,"Rxx32,Pe4=vacsh(Rss32,Rtt32)",ATTRIBS(), +"Add Compare and Select elements of two vectors, record the maximums and the decisions ", +{ + fHIDE(int i;) + fHIDE(int xv;) + fHIDE(int sv;) + fHIDE(int tv;) + for (i = 0; i < 4; i++) { + xv = (int) fGETHALF(i,RxxV); + sv = (int) fGETHALF(i,RssV); + tv = (int) fGETHALF(i,RttV); + xv = xv + tv; //assumes 17bit datapath + sv = sv - tv; //assumes 17bit datapath + fSETBIT(i*2, PeV, (xv > sv)); + fSETBIT(i*2+1,PeV, (xv > sv)); + fSETHALF(i, RxxV, fSATH(fMAX(xv,sv))); + } +}) + /**********************************************/ /* Vector Min/Max */ /**********************************************/ diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 18fe45d..87e0426 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1017,6 +1017,7 @@ MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") +MPY_ENC(A5_ACS, "1010","xxxxx","0","1","0","1","ee") /* */ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index a25fb98..f9fb655 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -347,6 +347,39 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) return ((uint64_t)RdV << 32) | PeV; } +int64_t HELPER(vacsh_val)(CPUHexagonState *env, + int64_t RxxV, int64_t RssV, int64_t RttV) +{ + for (int i = 0; i < 4; i++) { + int xv = sextract64(RxxV, i * 16, 16); + int sv = sextract64(RssV, i * 16, 16); + int tv = sextract64(RttV, i * 16, 16); + int max; + xv = xv + tv; + sv = sv - tv; + max = xv > sv ? xv : sv; + /* Note that fSATH can set the OVF bit in usr */ + RxxV = deposit64(RxxV, i * 16, 16, fSATH(max)); + } + return RxxV; +} + +int32_t HELPER(vacsh_pred)(CPUHexagonState *env, + int64_t RxxV, int64_t RssV, int64_t RttV) +{ + int32_t PeV = 0; + for (int i = 0; i < 4; i++) { + int xv = sextract64(RxxV, i * 16, 16); + int sv = sextract64(RssV, i * 16, 16); + int tv = sextract64(RttV, i * 16, 16); + xv = xv + tv; + sv = sv - tv; + PeV = deposit32(PeV, i * 2, 1, (xv > sv)); + PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv)); + } + return PeV; +} + /* * mem_noshuf * Section 5.5 of the Hexagon V67 Programmer's Reference Manual diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c index 67aa462..c21148f 100644 --- a/tests/tcg/hexagon/multi_result.c +++ b/tests/tcg/hexagon/multi_result.c @@ -45,8 +45,41 @@ static int sfinvsqrta(int Rs, int *pred_result) return result; } +static long long vacsh(long long Rxx, long long Rss, long long Rtt, + int *pred_result, int *ovf_result) +{ + long long result = Rxx; + int predval; + int usr; + + /* + * This instruction can set bit 0 (OVF/overflow) in usr + * Clear the bit first, then return that bit to the caller + */ + asm volatile("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "%0,p0 = vacsh(%3, %4)\n\t" + "%1 = p0\n\t" + "%2 = usr\n\t" + : "+r"(result), "=r"(predval), "=r"(usr) + : "r"(Rss), "r"(Rtt) + : "r2", "p0", "usr"); + *pred_result = predval; + *ovf_result = (usr & 1); + return result; +} + int err; +static void check_ll(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + static void check(int val, int expect) { if (val != expect) { @@ -87,10 +120,46 @@ static void test_sfinvsqrta() check_p(pred_result, 0x0); } +static void test_vacsh() +{ + long long res64; + int pred_result; + int ovf_result; + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x0000000000000000LL, &pred_result, &ovf_result); + check_ll(res64, 0x0004000300030004LL); + check_p(pred_result, 0xf0); + check(ovf_result, 0); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x000affff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0004LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); + + res64 = vacsh(0x00047fff00020001LL, + 0x00017fff00030004LL, + 0x000a0fff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e7fff000f0004LL); + check_p(pred_result, 0xfc); + check(ovf_result, 1); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030009LL, + 0x000affff000d0001LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0008LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); +} + int main() { test_sfrecipa(); test_sfinvsqrta(); + test_vacsh(); puts(err ? "FAIL" : "PASS"); return err; -- cgit v1.1 From 0a65d286936a5fd0ac459a0a047e527ce55731e3 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:48 -0500 Subject: Hexagon (target/hexagon) add A6_vminub_RdP Rdd32,Pe4 = vminub(Rtt32, Rss32) Vector min of bytes Test cases in tests/tcg/hexagon/multi_result.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-21-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 27 +++++++++++++++++++++++++++ target/hexagon/genptr.c | 22 ++++++++++++++++++++++ target/hexagon/imported/alu.idef | 10 ++++++++++ target/hexagon/imported/encode_pp.def | 1 + tests/tcg/hexagon/multi_result.c | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 94 insertions(+) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 93310c5..aea0c55 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -237,6 +237,33 @@ tcg_temp_free_i64(tmp); \ } while (0) +/* + * Compare each of the 8 unsigned bytes + * The minimum is placed in each byte of the destination. + * Each bit of the predicate is set true if the bit from the first operand + * is greater than the bit from the second operand. + * r5:4,p1 = vminub(r1:0, r3:2) + */ +#define fGEN_TCG_A6_vminub_RdP(SHORTCODE) \ + do { \ + TCGv left = tcg_temp_new(); \ + TCGv right = tcg_temp_new(); \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_movi_tl(PeV, 0); \ + tcg_gen_movi_i64(RddV, 0); \ + for (int i = 0; i < 8; i++) { \ + gen_get_byte_i64(left, i, RttV, false); \ + gen_get_byte_i64(right, i, RssV, false); \ + tcg_gen_setcond_tl(TCG_COND_GT, tmp, left, right); \ + tcg_gen_deposit_tl(PeV, PeV, tmp, i, 1); \ + tcg_gen_umin_tl(tmp, left, right); \ + gen_set_byte_i64(i, RddV, tmp); \ + } \ + tcg_temp_free(left); \ + tcg_temp_free(right); \ + tcg_temp_free(tmp); \ + } while (0) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 24d5758..9dbebc6 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -266,6 +266,28 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, } } +static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign) +{ + TCGv_i64 res64 = tcg_temp_new_i64(); + if (sign) { + tcg_gen_sextract_i64(res64, src, N * 8, 8); + } else { + tcg_gen_extract_i64(res64, src, N * 8, 8); + } + tcg_gen_extrl_i64_i32(result, res64); + tcg_temp_free_i64(res64); + + return result; +} + +static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) +{ + TCGv_i64 src64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(src64, src); + tcg_gen_deposit_i64(result, result, src64, N * 8, 8); + tcg_temp_free_i64(src64); +} + static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index) { tcg_gen_qemu_ld32u(dest, vaddr, mem_index); diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef index e8cc52c..f0c9bb4 100644 --- a/target/hexagon/imported/alu.idef +++ b/target/hexagon/imported/alu.idef @@ -1259,6 +1259,16 @@ Q6INSN(A5_ACS,"Rxx32,Pe4=vacsh(Rss32,Rtt32)",ATTRIBS(), } }) +Q6INSN(A6_vminub_RdP,"Rdd32,Pe4=vminub(Rtt32,Rss32)",ATTRIBS(), +"Vector minimum of bytes, records minimum and decision vector", +{ + fHIDE(int i;) + for (i = 0; i < 8; i++) { + fSETBIT(i, PeV, (fGETUBYTE(i,RttV) > fGETUBYTE(i,RssV))); + fSETBYTE(i,RddV,fMIN(fGETUBYTE(i,RttV),fGETUBYTE(i,RssV))); + } +}) + /**********************************************/ /* Vector Min/Max */ /**********************************************/ diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 87e0426..4619398 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1018,6 +1018,7 @@ MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") MPY_ENC(A5_ACS, "1010","xxxxx","0","1","0","1","ee") +MPY_ENC(A6_vminub_RdP, "1010","ddddd","0","1","1","1","ee") /* */ diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c index c21148f..95d99a0 100644 --- a/tests/tcg/hexagon/multi_result.c +++ b/tests/tcg/hexagon/multi_result.c @@ -70,6 +70,21 @@ static long long vacsh(long long Rxx, long long Rss, long long Rtt, return result; } +static long long vminub(long long Rtt, long long Rss, + int *pred_result) +{ + long long result; + int predval; + + asm volatile("%0,p0 = vminub(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(result), "=r"(predval) + : "r"(Rtt), "r"(Rss) + : "p0"); + *pred_result = predval; + return result; +} + int err; static void check_ll(long long val, long long expect) @@ -155,11 +170,30 @@ static void test_vacsh() check(ovf_result, 0); } +static void test_vminub() +{ + long long res64; + int pred_result; + + res64 = vminub(0x0807060504030201LL, + 0x0102030405060708LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xf0); + + res64 = vminub(0x0802060405030701LL, + 0x0107030504060208LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xaa); +} + int main() { test_sfrecipa(); test_sfinvsqrta(); test_vacsh(); + test_vminub(); puts(err ? "FAIL" : "PASS"); return err; -- cgit v1.1 From 57d352ac298b27617a53783305af2554025060d9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:49 -0500 Subject: Hexagon (target/hexagon) add A4_addp_c/A4_subp_c Rdd32 = add(Rss32, Rtt32, Px4):carry Add with carry Rdd32 = sub(Rss32, Rtt32, Px4):carry Sub with carry Test cases in tests/tcg/hexagon/multi_result.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-22-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 37 ++++++++++++++++ target/hexagon/genptr.c | 11 +++++ target/hexagon/imported/alu.idef | 15 +++++++ target/hexagon/imported/encode_pp.def | 2 + tests/tcg/hexagon/multi_result.c | 82 +++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index aea0c55..6bc578d 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -238,6 +238,43 @@ } while (0) /* + * Add or subtract with carry. + * Predicate register is used as an extra input and output. + * r5:4 = add(r1:0, r3:2, p1):carry + */ +#define fGEN_TCG_A4_addp_c(SHORTCODE) \ + do { \ + TCGv_i64 carry = tcg_temp_new_i64(); \ + TCGv_i64 zero = tcg_const_i64(0); \ + tcg_gen_extu_i32_i64(carry, PxV); \ + tcg_gen_andi_i64(carry, carry, 1); \ + tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \ + tcg_gen_add2_i64(RddV, carry, RddV, carry, RttV, zero); \ + tcg_gen_extrl_i64_i32(PxV, carry); \ + gen_8bitsof(PxV, PxV); \ + tcg_temp_free_i64(carry); \ + tcg_temp_free_i64(zero); \ + } while (0) + +/* r5:4 = sub(r1:0, r3:2, p1):carry */ +#define fGEN_TCG_A4_subp_c(SHORTCODE) \ + do { \ + TCGv_i64 carry = tcg_temp_new_i64(); \ + TCGv_i64 zero = tcg_const_i64(0); \ + TCGv_i64 not_RttV = tcg_temp_new_i64(); \ + tcg_gen_extu_i32_i64(carry, PxV); \ + tcg_gen_andi_i64(carry, carry, 1); \ + tcg_gen_not_i64(not_RttV, RttV); \ + tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \ + tcg_gen_add2_i64(RddV, carry, RddV, carry, not_RttV, zero); \ + tcg_gen_extrl_i64_i32(PxV, carry); \ + gen_8bitsof(PxV, PxV); \ + tcg_temp_free_i64(carry); \ + tcg_temp_free_i64(zero); \ + tcg_temp_free_i64(not_RttV); \ + } while (0) + +/* * Compare each of the 8 unsigned bytes * The minimum is placed in each byte of the destination. * Each bit of the predicate is set true if the bit from the first operand diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 9dbebc6..333f7d7 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -361,5 +361,16 @@ static inline void gen_store_conditional8(CPUHexagonState *env, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +static TCGv gen_8bitsof(TCGv result, TCGv value) +{ + TCGv zero = tcg_const_tl(0); + TCGv ones = tcg_const_tl(0xff); + tcg_gen_movcond_tl(TCG_COND_NE, result, value, zero, ones, zero); + tcg_temp_free(zero); + tcg_temp_free(ones); + + return result; +} + #include "tcg_funcs_generated.c.inc" #include "tcg_func_table_generated.c.inc" diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef index f0c9bb4..58477ae 100644 --- a/target/hexagon/imported/alu.idef +++ b/target/hexagon/imported/alu.idef @@ -153,6 +153,21 @@ Q6INSN(A2_subp,"Rdd32=sub(Rtt32,Rss32)",ATTRIBS(), "Sub", { RddV=RttV-RssV;}) +/* 64-bit with carry */ + +Q6INSN(A4_addp_c,"Rdd32=add(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Add with Carry", +{ + RddV = RssV + RttV + fLSBOLD(PxV); + PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,RttV,fLSBOLD(PxV))); +}) + +Q6INSN(A4_subp_c,"Rdd32=sub(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Sub with Carry", +{ + RddV = RssV + ~RttV + fLSBOLD(PxV); + PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,~RttV,fLSBOLD(PxV))); +}) + + /* NEG and ABS */ Q6INSN(A2_negsat,"Rd32=neg(Rs32):sat",ATTRIBS(), diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 4619398..514c240 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1749,6 +1749,8 @@ SH_RRR_ENC(S4_extractp_rp, "0001","11-","-","10-","ddddd") DEF_FIELDROW_DESC32(ICLASS_S3op" 0010 -------- PP------ --------","[#2] Rdd=(Rss,Rtt,Pu)") SH_RRR_ENC(S2_valignrb, "0010","0--","-","-uu","ddddd") SH_RRR_ENC(S2_vsplicerb, "0010","100","-","-uu","ddddd") +SH_RRR_ENC(A4_addp_c, "0010","110","-","-xx","ddddd") +SH_RRR_ENC(A4_subp_c, "0010","111","-","-xx","ddddd") DEF_FIELDROW_DESC32(ICLASS_S3op" 0011 -------- PP------ --------","[#3] Rdd=(Rss,Rt)") diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c index 95d99a0..52997b3 100644 --- a/tests/tcg/hexagon/multi_result.c +++ b/tests/tcg/hexagon/multi_result.c @@ -85,6 +85,38 @@ static long long vminub(long long Rtt, long long Rss, return result; } +static long long add_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = %1\n\t" + "%0 = add(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + +static long long sub_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = !cmp.eq(%1, #0)\n\t" + "%0 = sub(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + int err; static void check_ll(long long val, long long expect) @@ -188,12 +220,62 @@ static void test_vminub() check_p(pred_result, 0xaa); } +static void test_add_carry() +{ + long long res64; + int pred_result; + + res64 = add_carry(0x0000000000000000LL, + 0xffffffffffffffffLL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + +static void test_sub_carry() +{ + long long res64; + int pred_result; + + res64 = sub_carry(0x0000000000000000LL, + 0x0000000000000000LL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + int main() { test_sfrecipa(); test_sfinvsqrta(); test_vacsh(); test_vminub(); + test_add_carry(); + test_sub_carry(); puts(err ? "FAIL" : "PASS"); return err; -- cgit v1.1 From 46ef47e2a77d1a34996964760b4a0d2b19476f25 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:50 -0500 Subject: Hexagon (target/hexagon) circular addressing The following instructions are added L2_loadrub_pci Rd32 = memub(Rx32++#s4:0:circ(Mu2)) L2_loadrb_pci Rd32 = memb(Rx32++#s4:0:circ(Mu2)) L2_loadruh_pci Rd32 = memuh(Rx32++#s4:1:circ(Mu2)) L2_loadrh_pci Rd32 = memh(Rx32++#s4:1:circ(Mu2)) L2_loadri_pci Rd32 = memw(Rx32++#s4:2:circ(Mu2)) L2_loadrd_pci Rdd32 = memd(Rx32++#s4:3:circ(Mu2)) S2_storerb_pci memb(Rx32++#s4:0:circ(Mu2)) = Rt32 S2_storerh_pci memh(Rx32++#s4:1:circ(Mu2)) = Rt32 S2_storerf_pci memh(Rx32++#s4:1:circ(Mu2)) = Rt.H32 S2_storeri_pci memw(Rx32++#s4:2:circ(Mu2)) = Rt32 S2_storerd_pci memd(Rx32++#s4:3:circ(Mu2)) = Rtt32 S2_storerbnew_pci memb(Rx32++#s4:0:circ(Mu2)) = Nt8.new S2_storerhnew_pci memw(Rx32++#s4:1:circ(Mu2)) = Nt8.new S2_storerinew_pci memw(Rx32++#s4:2:circ(Mu2)) = Nt8.new L2_loadrub_pcr Rd32 = memub(Rx32++I:circ(Mu2)) L2_loadrb_pcr Rd32 = memb(Rx32++I:circ(Mu2)) L2_loadruh_pcr Rd32 = memuh(Rx32++I:circ(Mu2)) L2_loadrh_pcr Rd32 = memh(Rx32++I:circ(Mu2)) L2_loadri_pcr Rd32 = memw(Rx32++I:circ(Mu2)) L2_loadrd_pcr Rdd32 = memd(Rx32++I:circ(Mu2)) S2_storerb_pcr memb(Rx32++I:circ(Mu2)) = Rt32 S2_storerh_pcr memh(Rx32++I:circ(Mu2)) = Rt32 S2_storerf_pcr memh(Rx32++I:circ(Mu2)) = Rt32.H32 S2_storeri_pcr memw(Rx32++I:circ(Mu2)) = Rt32 S2_storerd_pcr memd(Rx32++I:circ(Mu2)) = Rtt32 S2_storerbnew_pcr memb(Rx32++I:circ(Mu2)) = Nt8.new S2_storerhnew_pcr memh(Rx32++I:circ(Mu2)) = Nt8.new S2_storerinew_pcr memw(Rx32++I:circ(Mu2)) = Nt8.new Test cases in tests/tcg/hexagon/circ.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-23-git-send-email-tsimpson@quicinc.com> [rth: Squash <1619667142-29636-1-git-send-email-tsimpson@quicinc.com> removing gen_read_reg and gen_set_byte to avoid clang Werror.] Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 112 +++++++- target/hexagon/genptr.c | 89 +++++++ target/hexagon/imported/encode_pp.def | 10 + target/hexagon/imported/ldst.idef | 4 + target/hexagon/imported/macros.def | 26 ++ target/hexagon/macros.h | 92 +++++++ target/hexagon/op_helper.c | 36 +-- tests/tcg/hexagon/Makefile.target | 2 + tests/tcg/hexagon/circ.c | 486 ++++++++++++++++++++++++++++++++++ 9 files changed, 834 insertions(+), 23 deletions(-) create mode 100644 tests/tcg/hexagon/circ.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 6bc578d..25c228c 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -38,6 +38,8 @@ * _ap absolute set r0 = memw(r1=##variable) * _pr post increment register r0 = memw(r1++m1) * _pi post increment immediate r0 = memb(r1++#1) + * _pci post increment circular immediate r0 = memw(r1++#4:circ(m0)) + * _pcr post increment circular register r0 = memw(r1++I:circ(m0)) */ /* Macros for complex addressing modes */ @@ -56,7 +58,22 @@ fEA_REG(RxV); \ fPM_I(RxV, siV); \ } while (0) - +#define GET_EA_pci \ + do { \ + TCGv tcgv_siV = tcg_const_tl(siV); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \ + hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(tcgv_siV); \ + } while (0) +#define GET_EA_pcr(SHIFT) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, (SHIFT)); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(ireg); \ + } while (0) /* Instructions with multiple definitions */ #define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \ @@ -80,6 +97,36 @@ #define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \ fGEN_TCG_LOAD_AP(RddV, 8, u) +#define fGEN_TCG_L2_loadrub_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrb_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadruh_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrh_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadri_pci(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrd_pci(SHORTCODE) SHORTCODE + +#define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, SHIFT); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + LOAD; \ + tcg_temp_free(ireg); \ + } while (0) + +#define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV)) +#define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV)) +#define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV)) +#define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV)) +#define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV)) +#define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \ + fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV)) + #define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE @@ -195,6 +242,69 @@ #define fGEN_TCG_S4_stored_locked(SHORTCODE) \ do { SHORTCODE; READ_PREG(PdV, PdN); } while (0) +#define fGEN_TCG_STORE(SHORTCODE) \ + do { \ + TCGv HALF = tcg_temp_new(); \ + TCGv BYTE = tcg_temp_new(); \ + SHORTCODE; \ + tcg_temp_free(HALF); \ + tcg_temp_free(BYTE); \ + } while (0) + +#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \ + do { \ + TCGv ireg = tcg_temp_new(); \ + TCGv HALF = tcg_temp_new(); \ + TCGv BYTE = tcg_temp_new(); \ + tcg_gen_mov_tl(EA, RxV); \ + gen_read_ireg(ireg, MuV, SHIFT); \ + gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \ + STORE; \ + tcg_temp_free(ireg); \ + tcg_temp_free(HALF); \ + tcg_temp_free(BYTE); \ + } while (0) + +#define fGEN_TCG_S2_storerb_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV))) + +#define fGEN_TCG_S2_storerh_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV))) + +#define fGEN_TCG_S2_storerf_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV))) + +#define fGEN_TCG_S2_storeri_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV)) + +#define fGEN_TCG_S2_storerd_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV)) + +#define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN))) + +#define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN))) + +#define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) +#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \ + fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN)) + /* * Mathematical operations with more than one definition require * special handling diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 333f7d7..c6928d6 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -266,6 +266,16 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, } } +static TCGv gen_get_byte(TCGv result, int N, TCGv src, bool sign) +{ + if (sign) { + tcg_gen_sextract_tl(result, src, N * 8, 8); + } else { + tcg_gen_extract_tl(result, src, N * 8, 8); + } + return result; +} + static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign) { TCGv_i64 res64 = tcg_temp_new_i64(); @@ -280,6 +290,16 @@ static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign) return result; } +static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign) +{ + if (sign) { + tcg_gen_sextract_tl(result, src, N * 16, 16); + } else { + tcg_gen_extract_tl(result, src, N * 16, 16); + } + return result; +} + static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) { TCGv_i64 src64 = tcg_temp_new_i64(); @@ -361,6 +381,75 @@ static inline void gen_store_conditional8(CPUHexagonState *env, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot) +{ + tcg_gen_mov_tl(hex_store_addr[slot], vaddr); + tcg_gen_movi_tl(hex_store_width[slot], width); + tcg_gen_mov_tl(hex_store_val32[slot], src); +} + +static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 1, slot); + ctx->store_width[slot] = 1; +} + +static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store1(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 2, slot); + ctx->store_width[slot] = 2; +} + +static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store2(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, + DisasContext *ctx, int slot) +{ + gen_store32(vaddr, src, 4, slot); + ctx->store_width[slot] = 4; +} + +static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, + DisasContext *ctx, int slot) +{ + TCGv tmp = tcg_const_tl(src); + gen_store4(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free(tmp); +} + +static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, + DisasContext *ctx, int slot) +{ + tcg_gen_mov_tl(hex_store_addr[slot], vaddr); + tcg_gen_movi_tl(hex_store_width[slot], 8); + tcg_gen_mov_i64(hex_store_val64[slot], src); + ctx->store_width[slot] = 8; +} + +static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, + DisasContext *ctx, int slot) +{ + TCGv_i64 tmp = tcg_const_i64(src); + gen_store8(cpu_env, vaddr, tmp, ctx, slot); + tcg_temp_free_i64(tmp); +} + static TCGv gen_8bitsof(TCGv result, TCGv value) { TCGv zero = tcg_const_tl(0); diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 514c240..68b435e 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -294,6 +294,7 @@ DEF_CLASS32(ICLASS_LD" ---- -------- PP------ --------",LD) DEF_CLASS32(ICLASS_LD" 0--- -------- PP------ --------",LD_ADDR_ROFFSET) +DEF_CLASS32(ICLASS_LD" 100- -------- PP----0- --------",LD_ADDR_POST_CIRC_IMMED) DEF_CLASS32(ICLASS_LD" 101- -------- PP00---- --------",LD_ADDR_POST_IMMED) DEF_CLASS32(ICLASS_LD" 101- -------- PP01---- --------",LD_ADDR_ABS_UPDATE_V4) DEF_CLASS32(ICLASS_LD" 101- -------- PP1----- --------",LD_ADDR_POST_IMMED_PRED_V2) @@ -308,18 +309,23 @@ DEF_FIELD32(ICLASS_LD" ---- --!----- PP------ --------",LD_UN,"Unsigned") #define STD_LD_ENC(TAG,OPC) \ DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiddddd")\ +DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiddddd")\ DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiddddd")\ DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIddddd")\ DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--ddddd")\ DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIddddd")\ +DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--ddddd")\ #define STD_LDX_ENC(TAG,OPC) \ DEF_ENC32(L2_load##TAG##_io, ICLASS_LD" 0 ii "OPC" sssss PPiiiiii iiiyyyyy")\ +DEF_ENC32(L2_load##TAG##_pci, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--0i iiiyyyyy")\ DEF_ENC32(L2_load##TAG##_pi, ICLASS_LD" 1 01 "OPC" xxxxx PP00---i iiiyyyyy")\ DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIyyyyy")\ DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--yyyyy")\ DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIyyyyy")\ +DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--yyyyy")\ +DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--yyyyy") #define STD_PLD_ENC(TAG,OPC) \ @@ -351,6 +357,7 @@ STD_PLD_ENC(rd, "1 110") /* note dest reg field LSB=0, 1 is reserved */ DEF_CLASS32( ICLASS_LD" 0--0 000----- PP------ --------",LD_MISC) DEF_ANTICLASS32(ICLASS_LD" 0--0 000----- PP------ --------",LD_ADDR_ROFFSET) +DEF_ANTICLASS32(ICLASS_LD" 1000 000----- PP------ --------",LD_ADDR_POST_CIRC_IMMED) DEF_ANTICLASS32(ICLASS_LD" 1010 000----- PP------ --------",LD_ADDR_POST_IMMED) DEF_ANTICLASS32(ICLASS_LD" 1100 000----- PP------ --------",LD_ADDR_POST_REG) DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG) @@ -397,6 +404,7 @@ DEF_FIELD32(ICLASS_ST" ---! !!------ PP------ --------",ST_Type,"Type") DEF_FIELD32(ICLASS_ST" ---- --!----- PP------ --------",ST_UN,"Unsigned") DEF_CLASS32(ICLASS_ST" 0--1 -------- PP------ --------",ST_ADDR_ROFFSET) +DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------0-",ST_ADDR_POST_CIRC_IMMED) DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 0-----0-",ST_ADDR_POST_IMMED) DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 1-------",ST_ADDR_ABS_UPDATE_V4) DEF_CLASS32(ICLASS_ST" 1011 -------- PP1----- --------",ST_ADDR_POST_IMMED_PRED_V2) @@ -411,10 +419,12 @@ DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP) #define STD_ST_ENC(TAG,OPC,SRC) \ DEF_ENC32(S2_store##TAG##_io, ICLASS_ST" 0 ii "OPC" sssss PPi"SRC" iiiiiiii")\ +DEF_ENC32(S2_store##TAG##_pci, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0iiii-0-")\ DEF_ENC32(S2_store##TAG##_pi, ICLASS_ST" 1 01 "OPC" xxxxx PP0"SRC" 0iiii-0-")\ DEF_ENC32(S4_store##TAG##_ap, ICLASS_ST" 1 01 "OPC" eeeee PP0"SRC" 1-IIIIII")\ DEF_ENC32(S2_store##TAG##_pr, ICLASS_ST" 1 10 "OPC" xxxxx PPu"SRC" 0-------")\ DEF_ENC32(S4_store##TAG##_ur, ICLASS_ST" 1 10 "OPC" uuuuu PPi"SRC" 1iIIIIII")\ +DEF_ENC32(S2_store##TAG##_pcr, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0-----1-")\ #define STD_PST_ENC(TAG,OPC,SRC) \ diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 78a2ea4..6ce0635 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -26,6 +26,8 @@ Q6INSN(L4_##TAG##_ur, OPER"(Rt32<<#u2+#U6)", ATTRIB,DESCR,{fMUST_IM Q6INSN(L4_##TAG##_ap, OPER"(Re32=#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\ Q6INSN(L2_##TAG##_pr, OPER"(Rx32++Mu2)", ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS;})\ Q6INSN(L2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")", ATTRIB,DESCR,{fEA_REG(RxV); fPM_I(RxV,siV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))",ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))", ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<> 21) | ((VAL >> 17) & 0x7f))) +#endif + #define fREAD_LR() (READ_REG(HEX_REG_LR)) #define fWRITE_LR(A) WRITE_RREG(HEX_REG_LR, A) @@ -418,6 +483,13 @@ static inline void gen_logical_not(TCGv dest, TCGv src) #define fEA_REG(REG) tcg_gen_mov_tl(EA, REG) #define fPM_I(REG, IMM) tcg_gen_addi_tl(REG, REG, IMM) #define fPM_M(REG, MVAL) tcg_gen_add_tl(REG, REG, MVAL) +#define fPM_CIRI(REG, IMM, MVAL) \ + do { \ + TCGv tcgv_siV = tcg_const_tl(siV); \ + gen_helper_fcircadd(REG, REG, tcgv_siV, MuV, \ + hex_gpr[HEX_REG_CS0 + MuN]); \ + tcg_temp_free(tcgv_siV); \ + } while (0) #else #define fEA_IMM(IMM) do { EA = (IMM); } while (0) #define fEA_REG(REG) do { EA = (REG); } while (0) @@ -494,23 +566,43 @@ static inline void gen_logical_not(TCGv dest, TCGv src) gen_load_locked##SIZE##SIGN(DST, EA, ctx->mem_idx); #endif +#ifdef QEMU_GENERATE +#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, insn->slot) +#else #define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, slot) +#endif #ifdef QEMU_GENERATE #define fSTORE_LOCKED(NUM, SIZE, EA, SRC, PRED) \ gen_store_conditional##SIZE(env, ctx, PdN, PRED, EA, SRC); #endif +#ifdef QEMU_GENERATE +#define GETBYTE_FUNC(X) \ + __builtin_choose_expr(TYPE_TCGV(X), \ + gen_get_byte, \ + __builtin_choose_expr(TYPE_TCGV_I64(X), \ + gen_get_byte_i64, (void)0)) +#define fGETBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, true) +#define fGETUBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, false) +#else #define fGETBYTE(N, SRC) ((int8_t)((SRC >> ((N) * 8)) & 0xff)) #define fGETUBYTE(N, SRC) ((uint8_t)((SRC >> ((N) * 8)) & 0xff)) +#endif #define fSETBYTE(N, DST, VAL) \ do { \ DST = (DST & ~(0x0ffLL << ((N) * 8))) | \ (((uint64_t)((VAL) & 0x0ffLL)) << ((N) * 8)); \ } while (0) + +#ifdef QEMU_GENERATE +#define fGETHALF(N, SRC) gen_get_half(HALF, N, SRC, true) +#define fGETUHALF(N, SRC) gen_get_half(HALF, N, SRC, false) +#else #define fGETHALF(N, SRC) ((int16_t)((SRC >> ((N) * 16)) & 0xffff)) #define fGETUHALF(N, SRC) ((uint16_t)((SRC >> ((N) * 16)) & 0xffff)) +#endif #define fSETHALF(N, DST, VAL) \ do { \ DST = (DST & ~(0x0ffffLL << ((N) * 16))) | \ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index f9fb655..2319b93 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -251,33 +251,25 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) } -static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS) -{ - int32_t length = M & 0x0001ffff; - uint32_t new_ptr = RxV + offset; - uint32_t start_addr = CS; - uint32_t end_addr = start_addr + length; - - if (new_ptr >= end_addr) { - new_ptr -= length; - } else if (new_ptr < start_addr) { - new_ptr += length; - } - - return new_ptr; -} - int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) { - int32_t K_const = (M >> 24) & 0xf; - int32_t length = M & 0x1ffff; - int32_t mask = (1 << (K_const + 2)) - 1; + int32_t K_const = sextract32(M, 24, 4); + int32_t length = sextract32(M, 0, 17); uint32_t new_ptr = RxV + offset; - uint32_t start_addr = RxV & (~mask); - uint32_t end_addr = start_addr | length; + uint32_t start_addr; + uint32_t end_addr; if (K_const == 0 && length >= 4) { - return fcircadd_v4(RxV, offset, M, CS); + start_addr = CS; + end_addr = start_addr + length; + } else { + /* + * Versions v3 and earlier used the K value to specify a power-of-2 size + * 2^(K+2) that is greater than the buffer length + */ + int32_t mask = (1 << (K_const + 2)) - 1; + start_addr = RxV & (~mask); + end_addr = start_addr | length; } if (new_ptr >= end_addr) { diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 18218ad..15c7091 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -28,6 +28,7 @@ endif CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal +CFLAGS += -fno-unroll-loops HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon VPATH += $(HEX_SRC) @@ -41,6 +42,7 @@ HEX_TESTS += preg_alias HEX_TESTS += dual_stores HEX_TESTS += multi_result HEX_TESTS += mem_noshuf +HEX_TESTS += circ HEX_TESTS += atomics HEX_TESTS += fpstuff diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c new file mode 100644 index 0000000..67a1aa3 --- /dev/null +++ b/tests/tcg/hexagon/circ.c @@ -0,0 +1,486 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include + +#define DEBUG 0 +#define DEBUG_PRINTF(...) \ + do { \ + if (DEBUG) { \ + printf(__VA_ARGS__); \ + } \ + } while (0) + + +#define NBYTES (1 << 8) +#define NHALFS (NBYTES / sizeof(short)) +#define NWORDS (NBYTES / sizeof(int)) +#define NDOBLS (NBYTES / sizeof(long long)) + +long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; +int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; +short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; +unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; + +/* + * We use the C preporcessor to deal with the combinations of types + */ + +#define INIT(BUF, N) \ + void init_##BUF(void) \ + { \ + int i; \ + for (i = 0; i < N; i++) { \ + BUF[i] = i; \ + } \ + } \ + +INIT(bbuf, NBYTES) +INIT(hbuf, NHALFS) +INIT(wbuf, NWORDS) +INIT(dbuf, NDOBLS) + +/* + * Macros for performing circular load + * RES result + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) + +/* + * The mreg has the following pieces + * mreg[31:28] increment[10:7] + * mreg[27:24] K value (used Hexagon v3 and earlier) + * mreg[23:17] increment[6:0] + * mreg[16:0] circular buffer length + */ +static int build_mreg(int inc, int K, int len) +{ + return ((inc & 0x780) << 21) | + ((K & 0xf) << 24) | + ((inc & 0x7f) << 17) | + (len & 0x1ffff); +} + +#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) + +/* + * Macros for performing circular store + * VAL value to store + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "{\n\t" \ + " r5 = %2\n\t" \ + " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "r5", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "m1", "cs1", "memory") +#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "{\n\t" \ + " r5 = %3\n\t" \ + " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "r5", "m1", "cs1", "memory") +#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) + + +int err; + +/* We'll test increments +1 and -1 */ +void check_load(int i, long long result, int inc, int size) +{ + int expect = (i * inc); + while (expect >= size) { + expect -= size; + } + while (expect < 0) { + expect += size; + } + if (result != expect) { + printf("ERROR(%d): %lld != %d\n", i, result, expect); + err++; + } +} + +#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ +void circ_test_load_imm_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 10; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ + } \ +} + +TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) +TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) + +#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ +void circ_test_load_reg_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 13; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, 1, size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, -1, size); \ + } \ +} + +TEST_LOAD_REG(b, char, bbuf, NBYTES, d) +TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) +TEST_LOAD_REG(h, short, hbuf, NHALFS, d) +TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) +TEST_LOAD_REG(w, int, wbuf, NWORDS, d) +TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) + +/* The circular stores will wrap around somewhere inside the buffer */ +#define CIRC_VAL(SZ, TYPE, BUFSIZE) \ +TYPE circ_val_##SZ(int i, int inc, int size) \ +{ \ + int mod = BUFSIZE % size; \ + int elem = i * inc; \ + if (elem < 0) { \ + if (-elem <= size - mod) { \ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE + size - mod); \ + } \ + } else if (elem < mod) {\ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE - size - mod); \ + } \ +} + +CIRC_VAL(b, unsigned char, NBYTES) +CIRC_VAL(h, short, NHALFS) +CIRC_VAL(w, int, NWORDS) +CIRC_VAL(d, long long, NDOBLS) + +/* + * Circular stores should only write to the first "size" elements of the buffer + * the remainder of the elements should have BUF[i] == i + */ +#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ +void check_store_##SZ(int inc, int size) \ +{ \ + int i; \ + for (i = 0; i < size; i++) { \ + DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + err++; \ + } \ + } \ + for (i = size; i < BUFSIZE; i++) { \ + if (BUF[i] != i) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ + err++; \ + } \ + } \ +} + +CHECK_STORE(b, bbuf, NBYTES, x) +CHECK_STORE(h, hbuf, NHALFS, x) +CHECK_STORE(w, wbuf, NWORDS, x) +CHECK_STORE(d, dbuf, NDOBLS, llx) + +#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ +void circ_test_store_imm_##SZ(void) \ +{ \ + unsigned int size = 27; \ + TYPE *p = BUF; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ + val++; \ + } \ + check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ + -(INC)); \ + val++; \ + } \ + check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ +} + +CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) +CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) +CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) +CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) + +#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ +void circ_test_store_reg_##SZ(void) \ +{ \ + TYPE *p = BUF; \ + unsigned int size = 19; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ + val++; \ + } \ + check_store_##CHK(1, size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ + val++; \ + } \ + check_store_##CHK(-1, size); \ +} + +CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) +CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) +CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) +CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) + +/* Test the old scheme used in Hexagon V3 */ +static void circ_test_v3(void) +{ + int *p = wbuf; + int size = 15; + int K = 4; /* 64 bytes */ + int element; + int i; + + init_wbuf(); + + for (i = 0; i < NWORDS; i++) { + __asm__( + "r4 = %2\n\t" + "m1 = r4\n\t" + "%0 = memw(%1++I:circ(M1))\n\t" + : "=r"(element), "+r"(p) + : "r"(build_mreg(1, K, size * sizeof(int))) + : "r4", "m1"); + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); + check_load(i, element, 1, size); + } +} + +int main() +{ + init_bbuf(); + init_hbuf(); + init_wbuf(); + init_dbuf(); + + DEBUG_PRINTF("NBYTES = %d\n", NBYTES); + DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); + DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); + DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); + DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); + + circ_test_load_imm_b(); + circ_test_load_imm_ub(); + circ_test_load_imm_h(); + circ_test_load_imm_uh(); + circ_test_load_imm_w(); + circ_test_load_imm_d(); + + circ_test_load_reg_b(); + circ_test_load_reg_ub(); + circ_test_load_reg_h(); + circ_test_load_reg_uh(); + circ_test_load_reg_w(); + circ_test_load_reg_d(); + + circ_test_store_imm_b(); + circ_test_store_imm_h(); + circ_test_store_imm_f(); + circ_test_store_imm_w(); + circ_test_store_imm_d(); + circ_test_store_imm_bnew(); + circ_test_store_imm_hnew(); + circ_test_store_imm_wnew(); + + circ_test_store_reg_b(); + circ_test_store_reg_h(); + circ_test_store_reg_f(); + circ_test_store_reg_w(); + circ_test_store_reg_d(); + circ_test_store_reg_bnew(); + circ_test_store_reg_hnew(); + circ_test_store_reg_wnew(); + + circ_test_v3(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} -- cgit v1.1 From af7f1821273c45a6101735023736882ec0399e86 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:51 -0500 Subject: Hexagon (target/hexagon) bit reverse (brev) addressing The following instructions are added L2_loadrub_pbr Rd32 = memub(Rx32++Mu2:brev) L2_loadrb_pbr Rd32 = memb(Rx32++Mu2:brev) L2_loadruh_pbr Rd32 = memuh(Rx32++Mu2:brev) L2_loadrh_pbr Rd32 = memh(Rx32++Mu2:brev) L2_loadri_pbr Rd32 = memw(Rx32++Mu2:brev) L2_loadrd_pbr Rdd32 = memd(Rx32++Mu2:brev) S2_storerb_pbr memb(Rx32++Mu2:brev).=.Rt32 S2_storerh_pbr memh(Rx32++Mu2:brev).=.Rt32 S2_storerf_pbr memh(Rx32++Mu2:brev).=.Rt.H32 S2_storeri_pbr memw(Rx32++Mu2:brev).=.Rt32 S2_storerd_pbr memd(Rx32++Mu2:brev).=.Rt32 S2_storerinew_pbr memw(Rx32++Mu2:brev).=.Nt8.new S2_storerbnew_pbr memw(Rx32++Mu2:brev).=.Nt8.new S2_storerhnew_pbr memw(Rx32++Mu2:brev).=.Nt8.new Test cases in tests/tcg/hexagon/brev.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-24-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 28 +++++ target/hexagon/helper.h | 1 + target/hexagon/imported/encode_pp.def | 4 + target/hexagon/imported/ldst.idef | 2 + target/hexagon/imported/macros.def | 6 ++ target/hexagon/macros.h | 1 + target/hexagon/op_helper.c | 8 ++ tests/tcg/hexagon/Makefile.target | 1 + tests/tcg/hexagon/brev.c | 190 ++++++++++++++++++++++++++++++++++ 9 files changed, 241 insertions(+) create mode 100644 tests/tcg/hexagon/brev.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 25c228c..8f0ec01 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -37,6 +37,7 @@ * _sp stack pointer relative r0 = memw(r29+#12) * _ap absolute set r0 = memw(r1=##variable) * _pr post increment register r0 = memw(r1++m1) + * _pbr post increment bit reverse r0 = memw(r1++m1:brev) * _pi post increment immediate r0 = memb(r1++#1) * _pci post increment circular immediate r0 = memw(r1++#4:circ(m0)) * _pcr post increment circular register r0 = memw(r1++I:circ(m0)) @@ -53,6 +54,11 @@ fEA_REG(RxV); \ fPM_M(RxV, MuV); \ } while (0) +#define GET_EA_pbr \ + do { \ + gen_helper_fbrev(EA, RxV); \ + tcg_gen_add_tl(RxV, RxV, MuV); \ + } while (0) #define GET_EA_pi \ do { \ fEA_REG(RxV); \ @@ -128,16 +134,22 @@ fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV)) #define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrub_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrb_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrb_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadruh_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadruh_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadruh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrh_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrh_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadri_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadri_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadri_pi(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrd_pr(SHORTCODE) SHORTCODE +#define fGEN_TCG_L2_loadrd_pbr(SHORTCODE) SHORTCODE #define fGEN_TCG_L2_loadrd_pi(SHORTCODE) SHORTCODE /* @@ -265,41 +277,57 @@ tcg_temp_free(BYTE); \ } while (0) +#define fGEN_TCG_S2_storerb_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerb_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV))) +#define fGEN_TCG_S2_storerh_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerh_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV))) +#define fGEN_TCG_S2_storerf_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerf_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV))) +#define fGEN_TCG_S2_storeri_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storeri_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV)) +#define fGEN_TCG_S2_storerd_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerd_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV)) +#define fGEN_TCG_S2_storerbnew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN))) +#define fGEN_TCG_S2_storerhnew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN))) +#define fGEN_TCG_S2_storerinew_pbr(SHORTCODE) \ + fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \ fGEN_TCG_STORE(SHORTCODE) #define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \ diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 3824ae0..ca201fb 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -24,6 +24,7 @@ DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) +DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 68b435e..4464926 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -301,6 +301,7 @@ DEF_CLASS32(ICLASS_LD" 101- -------- PP1----- --------",LD_ADDR_POST_IMMED_PRED_ DEF_CLASS32(ICLASS_LD" 110- -------- PP-0---- 0-------",LD_ADDR_POST_REG) DEF_CLASS32(ICLASS_LD" 110- -------- PP-1---- --------",LD_ADDR_ABS_PLUS_REG_V4) DEF_CLASS32(ICLASS_LD" 100- -------- PP----1- --------",LD_ADDR_POST_CREG_V2) +DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 0-------",LD_ADDR_POST_BREV_REG) DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 1-------",LD_ADDR_PRED_ABS_V4) DEF_FIELD32(ICLASS_LD" !!!- -------- PP------ --------",LD_Amode,"Amode") @@ -315,6 +316,7 @@ DEF_ENC32(L4_load##TAG##_ap, ICLASS_LD" 1 01 "OPC" eeeee PP01IIII -IIddddd" DEF_ENC32(L2_load##TAG##_pr, ICLASS_LD" 1 10 "OPC" xxxxx PPu0---- 0--ddddd")\ DEF_ENC32(L4_load##TAG##_ur, ICLASS_LD" 1 10 "OPC" ttttt PPi1IIII iIIddddd")\ DEF_ENC32(L2_load##TAG##_pcr, ICLASS_LD" 1 00 "OPC" xxxxx PPu0--1- 0--ddddd")\ +DEF_ENC32(L2_load##TAG##_pbr, ICLASS_LD" 1 11 "OPC" xxxxx PPu0---- 0--ddddd") #define STD_LDX_ENC(TAG,OPC) \ @@ -412,6 +414,7 @@ DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 1-------",ST_ADDR_PRED_ABS_V4) DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 0-------",ST_ADDR_POST_REG) DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 1-------",ST_ADDR_ABS_PLUS_REG_V4) DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------1-",ST_ADDR_POST_CREG_V2) +DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 0-------",ST_ADDR_POST_BREV_REG) DEF_CLASS32(ICLASS_ST" 0--0 1------- PP------ --------",ST_MISC_STORELIKE) DEF_CLASS32(ICLASS_ST" 1--0 0------- PP------ --------",ST_MISC_BUSOP) DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP) @@ -425,6 +428,7 @@ DEF_ENC32(S4_store##TAG##_ap, ICLASS_ST" 1 01 "OPC" eeeee PP0"SRC" 1-IIIIII DEF_ENC32(S2_store##TAG##_pr, ICLASS_ST" 1 10 "OPC" xxxxx PPu"SRC" 0-------")\ DEF_ENC32(S4_store##TAG##_ur, ICLASS_ST" 1 10 "OPC" uuuuu PPi"SRC" 1iIIIIII")\ DEF_ENC32(S2_store##TAG##_pcr, ICLASS_ST" 1 00 "OPC" xxxxx PPu"SRC" 0-----1-")\ +DEF_ENC32(S2_store##TAG##_pbr, ICLASS_ST" 1 11 "OPC" xxxxx PPu"SRC" 0-------") #define STD_PST_ENC(TAG,OPC,SRC) \ diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 6ce0635..fe7e018 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -25,6 +25,7 @@ Q6INSN(L2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")", ATTRIB,DESCR,{fIMMEXT( Q6INSN(L4_##TAG##_ur, OPER"(Rt32<<#u2+#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RtV,uiV); SEMANTICS;})\ Q6INSN(L4_##TAG##_ap, OPER"(Re32=#U6)", ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\ Q6INSN(L2_##TAG##_pr, OPER"(Rx32++Mu2)", ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS;})\ +Q6INSN(L2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)", ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS;})\ Q6INSN(L2_##TAG##_pi, OPER"(Rx32++#s4:"SHFT")", ATTRIB,DESCR,{fEA_REG(RxV); fPM_I(RxV,siV); SEMANTICS;})\ Q6INSN(L2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))",ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\ Q6INSN(L2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))", ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<>= 1; + } + return result; +} + +int sext8(int x) +{ + return (x << 24) >> 24; +} + +void check(int i, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR(%d): 0x%04llx != 0x%04llx\n", i, result, expect); + err++; + } +} + +#define TEST_BREV_LOAD(SZ, TYPE, BUF, SHIFT, EXP) \ + do { \ + p = BUF; \ + for (i = 0; i < SIZE; i++) { \ + TYPE result; \ + BREV_LOAD_##SZ(result, p, 1 << (SHIFT - NBITS)); \ + check(i, result, EXP); \ + } \ + } while (0) + +#define TEST_BREV_STORE(SZ, TYPE, BUF, VAL, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, (TYPE)(VAL), 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +#define TEST_BREV_STORE_NEW(SZ, BUF, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, i, 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +/* + * We'll set high_half[i] = i << 16 for use in the .H form of store + * which stores from the high half of the word. + */ +int high_half[SIZE]; + +int main() +{ + void *p; + int i; + + for (i = 0; i < SIZE; i++) { + bbuf[i] = bitreverse(i); + hbuf[i] = bitreverse(i); + wbuf[i] = bitreverse(i); + dbuf[i] = bitreverse(i); + high_half[i] = i << 16; + } + + TEST_BREV_LOAD(b, int, bbuf, 16, sext8(i)); + TEST_BREV_LOAD(ub, int, bbuf, 16, i); + TEST_BREV_LOAD(h, int, hbuf, 15, i); + TEST_BREV_LOAD(uh, int, hbuf, 15, i); + TEST_BREV_LOAD(w, int, wbuf, 14, i); + TEST_BREV_LOAD(d, long long, dbuf, 13, i); + + TEST_BREV_STORE(b, int, bbuf, i, 16); + TEST_BREV_STORE(h, int, hbuf, i, 15); + TEST_BREV_STORE(f, int, hbuf, high_half[i], 15); + TEST_BREV_STORE(w, int, wbuf, i, 14); + TEST_BREV_STORE(d, long long, dbuf, i, 13); + + TEST_BREV_STORE_NEW(bnew, bbuf, 16); + TEST_BREV_STORE_NEW(hnew, hbuf, 15); + TEST_BREV_STORE_NEW(wnew, wbuf, 14); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} -- cgit v1.1 From 0d0b91a8049967f5e3bfd86e9d372d61b4019b77 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:52 -0500 Subject: Hexagon (target/hexagon) load and unpack bytes instructions The following instructions are added L2_loadbzw2_io Rd32 = memubh(Rs32+#s11:1) L2_loadbzw4_io Rdd32 = memubh(Rs32+#s11:1) L2_loadbsw2_io Rd32 = membh(Rs32+#s11:1) L2_loadbsw4_io Rdd32 = membh(Rs32+#s11:1) L4_loadbzw2_ur Rd32 = memubh(Rt32<<#u2+#U6) L4_loadbzw4_ur Rdd32 = memubh(Rt32<<#u2+#U6) L4_loadbsw2_ur Rd32 = membh(Rt32<<#u2+#U6) L4_loadbsw4_ur Rdd32 = membh(Rt32<<#u2+#U6) L4_loadbzw2_ap Rd32 = memubh(Re32=#U6) L4_loadbzw4_ap Rdd32 = memubh(Re32=#U6) L4_loadbsw2_ap Rd32 = membh(Re32=#U6) L4_loadbsw4_ap Rdd32 = membh(Re32=#U6) L2_loadbzw2_pr Rd32 = memubh(Rx32++Mu2) L2_loadbzw4_pr Rdd32 = memubh(Rx32++Mu2) L2_loadbsw2_pr Rd32 = membh(Rx32++Mu2) L2_loadbsw4_pr Rdd32 = membh(Rx32++Mu2) L2_loadbzw2_pbr Rd32 = memubh(Rx32++Mu2:brev) L2_loadbzw4_pbr Rdd32 = memubh(Rx32++Mu2:brev) L2_loadbsw2_pbr Rd32 = membh(Rx32++Mu2:brev) L2_loadbsw4_pbr Rdd32 = membh(Rx32++Mu2:brev) L2_loadbzw2_pi Rd32 = memubh(Rx32++#s4:1) L2_loadbzw4_pi Rdd32 = memubh(Rx32++#s4:1) L2_loadbsw2_pi Rd32 = membh(Rx32++#s4:1) L2_loadbsw4_pi Rdd32 = membh(Rx32++#s4:1) L2_loadbzw2_pci Rd32 = memubh(Rx32++#s4:1:circ(Mu2)) L2_loadbzw4_pci Rdd32 = memubh(Rx32++#s4:1:circ(Mu2)) L2_loadbsw2_pci Rd32 = membh(Rx32++#s4:1:circ(Mu2)) L2_loadbsw4_pci Rdd32 = membh(Rx32++#s4:1:circ(Mu2)) L2_loadbzw2_pcr Rd32 = memubh(Rx32++I:circ(Mu2)) L2_loadbzw4_pcr Rdd32 = memubh(Rx32++I:circ(Mu2)) L2_loadbsw2_pcr Rd32 = membh(Rx32++I:circ(Mu2)) L2_loadbsw4_pcr Rdd32 = membh(Rx32++I:circ(Mu2)) Test cases in tests/tcg/hexagon/load_unpack.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-25-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 108 ++++++++ target/hexagon/genptr.c | 13 + target/hexagon/imported/encode_pp.def | 6 + target/hexagon/imported/ldst.idef | 43 +++ target/hexagon/macros.h | 16 ++ tests/tcg/hexagon/Makefile.target | 1 + tests/tcg/hexagon/load_unpack.c | 474 ++++++++++++++++++++++++++++++++++ 7 files changed, 661 insertions(+) create mode 100644 tests/tcg/hexagon/load_unpack.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 8f0ec01..1120aae 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -153,6 +153,114 @@ #define fGEN_TCG_L2_loadrd_pi(SHORTCODE) SHORTCODE /* + * These instructions load 2 bytes and places them in + * two halves of the destination register. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 2, u, EA, tmp); \ + tcg_gen_movi_tl(RdV, 0); \ + for (int i = 0; i < 2; i++) { \ + gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, true) +#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), false) +#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), true) + +/* + * These instructions load 4 bytes and places them in + * four halves of the destination register pair. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 4, u, EA, tmp); \ + tcg_gen_movi_i64(RddV, 0); \ + for (int i = 0; i < 4; i++) { \ + gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), false) +#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), true) +#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, true) + +/* * Predicated loads * Here is a primer to understand the tag names * diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index c6928d6..f93f895 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -300,6 +300,19 @@ static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign) return result; } +static inline void gen_set_half(int N, TCGv result, TCGv src) +{ + tcg_gen_deposit_tl(result, result, src, N * 16, 16); +} + +static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src) +{ + TCGv_i64 src64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(src64, src); + tcg_gen_deposit_i64(result, result, src64, N * 16, 16); + tcg_temp_free_i64(src64); +} + static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) { TCGv_i64 src64 = tcg_temp_new_i64(); diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 4464926..e3582eb 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -342,6 +342,12 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddd /* 0 000 misc: dealloc,loadw_locked,dcfetch */ +STD_LD_ENC(bzw4,"0 101") +STD_LD_ENC(bzw2,"0 011") + +STD_LD_ENC(bsw4,"0 111") +STD_LD_ENC(bsw2,"0 001") + STD_LD_ENC(rb, "1 000") STD_LD_ENC(rub, "1 001") STD_LD_ENC(rh, "1 010") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index fe7e018..95c0470 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -38,6 +38,49 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1 STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2) STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3) +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETUBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETUBYTE(i,tmpV)); + } +},2) + + + +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETBYTE(i,tmpV)); + } +},2) + + + + /* The set of addressing modes standard to all Store instructions */ #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\ Q6INSN(S2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")="DEST, ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\ diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 30c8951..ec5bf60 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -465,6 +465,21 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fCAST8S_16S(A) (int128_exts64(A)) #define fCAST16S_8S(A) (int128_getlo(A)) +#ifdef QEMU_GENERATE +#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM) +#define fEA_RRs(REG, REG2, SCALE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_shli_tl(tmp, REG2, SCALE); \ + tcg_gen_add_tl(EA, REG, tmp); \ + tcg_temp_free(tmp); \ + } while (0) +#define fEA_IRs(IMM, REG, SCALE) \ + do { \ + tcg_gen_shli_tl(EA, REG, SCALE); \ + tcg_gen_addi_tl(EA, EA, IMM); \ + } while (0) +#else #define fEA_RI(REG, IMM) \ do { \ EA = REG + IMM; \ @@ -477,6 +492,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) do { \ EA = IMM + (REG << SCALE); \ } while (0) +#endif #ifdef QEMU_GENERATE #define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM) diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 6e38950..183f4e2 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -44,6 +44,7 @@ HEX_TESTS += multi_result HEX_TESTS += mem_noshuf HEX_TESTS += circ HEX_TESTS += brev +HEX_TESTS += load_unpack HEX_TESTS += atomics HEX_TESTS += fpstuff diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c new file mode 100644 index 0000000..3575a37 --- /dev/null +++ b/tests/tcg/hexagon/load_unpack.c @@ -0,0 +1,474 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* + * Test load unpack instructions + * + * Example + * r0 = memubh(r1+#0) + * loads a half word from memory and zero-extends the 2 bytes to form a word + * + * For each addressing mode, there are 4 tests + * bzw2 unsigned 2 elements + * bsw2 signed 2 elements + * bzw4 unsigned 4 elements + * bsw4 signed 4 elements + * There are 8 addressing modes, for a total of 32 instructions to test + */ + +#include +#include + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + int sign = i % 2 == 0 ? 0x80 : 0; + buf[i] = sign | (i + 1); + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08llx != 0x%08llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \ + : "=r"(RES) \ + : "r"(ADDR)) +#define BxW_LOAD_io_Z(RES, ADDR, OFF) \ + BxW_LOAD_io(ubh, RES, ADDR, OFF) +#define BxW_LOAD_io_S(RES, ADDR, OFF) \ + BxW_LOAD_io(bh, RES, ADDR, OFF) + +#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \ + check(result, (EXP1) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \ + check(result, (EXP2) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \ + check(result, (EXP3) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \ + check(result, (EXP4) | (EXT)); \ +} + + +TEST_io(loadbzw2_io, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbzw4_io, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_io(loadbsw4_io, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "=r"(RES) \ + : "r"(IDX)) +#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \ + BxW_LOAD_ur(ubh, RES, SHIFT, IDX) +#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \ + BxW_LOAD_ur(bh, RES, SHIFT, IDX) + +#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \ + check(result, (RES4) | (EXT)); \ +} \ + +TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \ + : "=r"(RES), "=r"(PTR)) +#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \ + BxW_LOAD_ap(ubh, RES, PTR, ADDR) +#define BxW_LOAD_ap_S(RES, PTR, ADDR) \ + BxW_LOAD_ap(bh, RES, PTR, ADDR) + +#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr; \ + init_buf(); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define BxW_LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++m0)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define BxW_LOAD_pr_Z(RES, PTR, INC) \ + BxW_LOAD_pr(ubh, RES, PTR, INC) +#define BxW_LOAD_pr_S(RES, PTR, INC) \ + BxW_LOAD_pr(bh, RES, PTR, INC) + +#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define BxW_LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "(%1++m0:brev)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define BxW_LOAD_pbr_Z(RES, PTR) \ + BxW_LOAD_pbr(ubh, RES, PTR) +#define BxW_LOAD_pbr_S(RES, PTR) \ + BxW_LOAD_pbr(bh, RES, PTR) + +#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES4) | (EXT)); \ +} + +TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) +TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define BxW_LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \ + : "=r"(RES), "+r"(PTR)) +#define BxW_LOAD_pi_Z(RES, PTR, INC) \ + BxW_LOAD_pi(ubh, RES, PTR, INC) +#define BxW_LOAD_pi_S(RES, PTR, INC) \ + BxW_LOAD_pi(bh, RES, PTR, INC) + +#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \ + EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +int main() +{ + test_loadbzw2_io(); + test_loadbsw2_io(); + test_loadbzw4_io(); + test_loadbsw4_io(); + + test_loadbzw2_ur(); + test_loadbsw2_ur(); + test_loadbzw4_ur(); + test_loadbsw4_ur(); + + test_loadbzw2_ap(); + test_loadbsw2_ap(); + test_loadbzw4_ap(); + test_loadbsw4_ap(); + + test_loadbzw2_pr(); + test_loadbsw2_pr(); + test_loadbzw4_pr(); + test_loadbsw4_pr(); + + test_loadbzw2_pbr(); + test_loadbsw2_pbr(); + test_loadbzw4_pbr(); + test_loadbsw4_pbr(); + + test_loadbzw2_pi(); + test_loadbsw2_pi(); + test_loadbzw4_pi(); + test_loadbsw4_pi(); + + test_loadbzw2_pci(); + test_loadbsw2_pci(); + test_loadbzw4_pci(); + test_loadbsw4_pci(); + + test_loadbzw2_pcr(); + test_loadbsw2_pcr(); + test_loadbzw4_pcr(); + test_loadbsw4_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} -- cgit v1.1 From 7aa9ffab79eb2f3ba998333e3709c7b8dbc630f1 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:53 -0500 Subject: Hexagon (target/hexagon) load into shifted register instructions The following instructions are added L2_loadalignb_io Ryy32 = memb_fifo(Rs32+#s11:1) L2_loadalignh_io Ryy32 = memh_fifo(Rs32+#s11:1) L4_loadalignb_ur Ryy32 = memb_fifo(Rt32<<#u2+#U6) L4_loadalignh_ur Ryy32 = memh_fifo(Rt32<<#u2+#U6) L4_loadalignb_ap Ryy32 = memb_fifo(Re32=#U6) L4_loadalignh_ap Ryy32 = memh_fifo(Re32=#U6) L2_loadalignb_pr Ryy32 = memb_fifo(Rx32++Mu2) L2_loadalignh_pr Ryy32 = memh_fifo(Rx32++Mu2) L2_loadalignb_pbr Ryy32 = memb_fifo(Rx32++Mu2:brev) L2_loadalignh_pbr Ryy32 = memh_fifo(Rx32++Mu2:brev) L2_loadalignb_pi Ryy32 = memb_fifo(Rx32++#s4:1) L2_loadalignh_pi Ryy32 = memh_fifo(Rx32++#s4:1) L2_loadalignb_pci Ryy32 = memb_fifo(Rx32++#s4:1:circ(Mu2)) L2_loadalignh_pci Ryy32 = memh_fifo(Rx32++#s4:1:circ(Mu2)) L2_loadalignb_pcr Ryy32 = memb_fifo(Rx32++I:circ(Mu2)) L2_loadalignh_pcr Ryy32 = memh_fifo(Rx32++I:circ(Mu2)) Test cases in tests/tcg/hexagon/load_align.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-26-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/gen_tcg.h | 66 ++++++ target/hexagon/imported/encode_pp.def | 3 + target/hexagon/imported/ldst.idef | 19 ++ tests/tcg/hexagon/Makefile.target | 1 + tests/tcg/hexagon/load_align.c | 415 ++++++++++++++++++++++++++++++++++ 5 files changed, 504 insertions(+) create mode 100644 tests/tcg/hexagon/load_align.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 1120aae..18fcdbc 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -261,6 +261,72 @@ fGEN_TCG_loadbXw4(GET_EA_pi, true) /* + * These instructions load a half word, shift the destination right by 16 bits + * and place the loaded value in the high half word of the destination pair. + * The GET_EA macro determines the addressing mode. + */ +#define fGEN_TCG_loadalignh(GET_EA) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \ + GET_EA; \ + fLOAD(1, 2, u, EA, tmp); \ + tcg_gen_extu_i32_i64(tmp_i64, tmp); \ + tcg_gen_shri_i64(RyyV, RyyV, 16); \ + tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 48, 16); \ + tcg_temp_free(tmp); \ + tcg_temp_free_i64(tmp_i64); \ + } while (0) + +#define fGEN_TCG_L4_loadalignh_ur(SHORTCODE) \ + fGEN_TCG_loadalignh(fEA_IRs(UiV, RtV, uiV)) +#define fGEN_TCG_L2_loadalignh_io(SHORTCODE) \ + fGEN_TCG_loadalignh(fEA_RI(RsV, siV)) +#define fGEN_TCG_L2_loadalignh_pci(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pci) +#define fGEN_TCG_L2_loadalignh_pcr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pcr(1)) +#define fGEN_TCG_L4_loadalignh_ap(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_ap) +#define fGEN_TCG_L2_loadalignh_pr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pr) +#define fGEN_TCG_L2_loadalignh_pbr(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pbr) +#define fGEN_TCG_L2_loadalignh_pi(SHORTCODE) \ + fGEN_TCG_loadalignh(GET_EA_pi) + +/* Same as above, but loads a byte instead of half word */ +#define fGEN_TCG_loadalignb(GET_EA) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \ + GET_EA; \ + fLOAD(1, 1, u, EA, tmp); \ + tcg_gen_extu_i32_i64(tmp_i64, tmp); \ + tcg_gen_shri_i64(RyyV, RyyV, 8); \ + tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 56, 8); \ + tcg_temp_free(tmp); \ + tcg_temp_free_i64(tmp_i64); \ + } while (0) + +#define fGEN_TCG_L2_loadalignb_io(SHORTCODE) \ + fGEN_TCG_loadalignb(fEA_RI(RsV, siV)) +#define fGEN_TCG_L4_loadalignb_ur(SHORTCODE) \ + fGEN_TCG_loadalignb(fEA_IRs(UiV, RtV, uiV)) +#define fGEN_TCG_L2_loadalignb_pci(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pci) +#define fGEN_TCG_L2_loadalignb_pcr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pcr(0)) +#define fGEN_TCG_L4_loadalignb_ap(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_ap) +#define fGEN_TCG_L2_loadalignb_pr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pr) +#define fGEN_TCG_L2_loadalignb_pbr(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pbr) +#define fGEN_TCG_L2_loadalignb_pi(SHORTCODE) \ + fGEN_TCG_loadalignb(GET_EA_pi) + +/* * Predicated loads * Here is a primer to understand the tag names * diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index e3582eb..dc4eba4 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -348,6 +348,9 @@ STD_LD_ENC(bzw2,"0 011") STD_LD_ENC(bsw4,"0 111") STD_LD_ENC(bsw2,"0 001") +STD_LDX_ENC(alignh,"0 010") +STD_LDX_ENC(alignb,"0 100") + STD_LD_ENC(rb, "1 000") STD_LD_ENC(rub, "1 001") STD_LD_ENC(rh, "1 010") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 95c0470..359d3b7 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -80,6 +80,25 @@ ATTRIBS(A_LOAD),"2", +STD_LD_AMODES(loadalignh, "Ryy32=memh_fifo", "Load Half-word into shifted vector", +ATTRIBS(A_LOAD),"1", +{ + fHIDE(size8u_t tmpV;) + fLOAD(1,2,u,EA,tmpV); + RyyV = (((size8u_t)RyyV)>>16)|(tmpV<<48); +},1) + + +STD_LD_AMODES(loadalignb, "Ryy32=memb_fifo", "Load byte into shifted vector", +ATTRIBS(A_LOAD),"0", +{ + fHIDE(size8u_t tmpV;) + fLOAD(1,1,u,EA,tmpV); + RyyV = (((size8u_t)RyyV)>>8)|(tmpV<<56); +},0) + + + /* The set of addressing modes standard to all Store instructions */ #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\ diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 183f4e2..0992787 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -45,6 +45,7 @@ HEX_TESTS += mem_noshuf HEX_TESTS += circ HEX_TESTS += brev HEX_TESTS += load_unpack +HEX_TESTS += load_align HEX_TESTS += atomics HEX_TESTS += fpstuff diff --git a/tests/tcg/hexagon/load_align.c b/tests/tcg/hexagon/load_align.c new file mode 100644 index 0000000..12fc9cb --- /dev/null +++ b/tests/tcg/hexagon/load_align.c @@ -0,0 +1,415 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* + * Test load align instructions + * + * Example + * r1:0 = memh_fifo(r1+#0) + * loads a half word from memory, shifts the destination register + * right by one half word and inserts the loaded value into the high + * half word of the destination. + * + * There are 8 addressing modes and byte and half word variants, for a + * total of 16 instructions to test + */ + +#include +#include + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + buf[i] = i + 1; + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1+#" #OFF ")\n\t" \ + : "+r"(RES) \ + : "r"(ADDR)) +#define LOAD_io_b(RES, ADDR, OFF) \ + LOAD_io(b, RES, ADDR, OFF) +#define LOAD_io_h(RES, ADDR, OFF) \ + LOAD_io(h, RES, ADDR, OFF) + +#define TEST_io(NAME, SZ, SIZE, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_io_##SZ(result, buf, 0 * (SIZE)); \ + check(result, (EXP1)); \ + LOAD_io_##SZ(result, buf, 1 * (SIZE)); \ + check(result, (EXP2)); \ + LOAD_io_##SZ(result, buf, 2 * (SIZE)); \ + check(result, (EXP3)); \ + LOAD_io_##SZ(result, buf, 3 * (SIZE)); \ + check(result, (EXP4)); \ +} + +TEST_io(loadalignb_io, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_io(loadalignh_io, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "+r"(RES) \ + : "r"(IDX)) +#define LOAD_ur_b(RES, SHIFT, IDX) \ + LOAD_ur(b, RES, SHIFT, IDX) +#define LOAD_ur_h(RES, SHIFT, IDX) \ + LOAD_ur(h, RES, SHIFT, IDX) + +#define TEST_ur(NAME, SZ, SHIFT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_ur_##SZ(result, (SHIFT), 0); \ + check(result, (RES1)); \ + LOAD_ur_##SZ(result, (SHIFT), 1); \ + check(result, (RES2)); \ + LOAD_ur_##SZ(result, (SHIFT), 2); \ + check(result, (RES3)); \ + LOAD_ur_##SZ(result, (SHIFT), 3); \ + check(result, (RES4)); \ +} + +TEST_ur(loadalignb_ur, b, 1, + 0x01ffffffffffffffLL, 0x0301ffffffffffffLL, + 0x050301ffffffffffLL, 0x07050301ffffffffLL) +TEST_ur(loadalignh_ur, h, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1 = ##" #ADDR ")\n\t" \ + : "+r"(RES), "=r"(PTR)) +#define LOAD_ap_b(RES, PTR, ADDR) \ + LOAD_ap(b, RES, PTR, ADDR) +#define LOAD_ap_h(RES, PTR, ADDR) \ + LOAD_ap(h, RES, PTR, ADDR) + +#define TEST_ap(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr; \ + LOAD_ap_##SZ(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadalignb_ap, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_ap(loadalignh_ap, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define LOAD_pr_b(RES, PTR, INC) \ + LOAD_pr(b, RES, PTR, INC) +#define LOAD_pr_h(RES, PTR, INC) \ + LOAD_pr(h, RES, PTR, INC) + +#define TEST_pr(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadalignb_pr, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pr(loadalignh_pr, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0:brev)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define LOAD_pbr_b(RES, PTR) \ + LOAD_pbr(b, RES, PTR) +#define LOAD_pbr_h(RES, PTR) \ + LOAD_pbr(h, RES, PTR) + +#define TEST_pbr(NAME, SZ, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES1)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES2)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES3)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES4)); \ +} + +TEST_pbr(loadalignb_pbr, b, + 0x01ffffffffffffffLL, 0x0501ffffffffffffLL, + 0x030501ffffffffffLL, 0x07030501ffffffffLL) +TEST_pbr(loadalignh_pbr, h, + 0x0201ffffffffffffLL, 0x06050201ffffffffLL, + 0x040306050201ffffLL, 0x0807040306050201LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ")\n\t" \ + : "+r"(RES), "+r"(PTR)) +#define LOAD_pi_b(RES, PTR, INC) \ + LOAD_pi(b, RES, PTR, INC) +#define LOAD_pi_h(RES, PTR, INC) \ + LOAD_pi(h, RES, PTR, INC) + +#define TEST_pi(NAME, SZ, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadalignb_pi, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pi(loadalignh_pi, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ":circ(m0))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define LOAD_pci_b(RES, PTR, START, LEN, INC) \ + LOAD_pci(b, RES, PTR, START, LEN, INC) +#define LOAD_pci_h(RES, PTR, START, LEN, INC) \ + LOAD_pci(h, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, SZ, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadalignb_pci, b, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pci(loadalignh_pci, h, 4, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "_fifo(%1++I:circ(m1))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define LOAD_pcr_b(RES, PTR, START, LEN, INC) \ + LOAD_pcr(b, RES, PTR, START, LEN, INC) +#define LOAD_pcr_h(RES, PTR, START, LEN, INC) \ + LOAD_pcr(h, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, SZ, SIZE, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadalignb_pcr, b, 1, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pcr(loadalignh_pcr, h, 2, 4, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +int main() +{ + init_buf(); + + test_loadalignb_io(); + test_loadalignh_io(); + + test_loadalignb_ur(); + test_loadalignh_ur(); + + test_loadalignb_ap(); + test_loadalignh_ap(); + + test_loadalignb_pr(); + test_loadalignh_pr(); + + test_loadalignb_pbr(); + test_loadalignh_pbr(); + + test_loadalignb_pi(); + test_loadalignh_pi(); + + test_loadalignb_pci(); + test_loadalignh_pci(); + + test_loadalignb_pcr(); + test_loadalignh_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} -- cgit v1.1 From e628c0156be74dd14a261bbd18674bacd1afcc7d Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 8 Apr 2021 20:07:54 -0500 Subject: Hexagon (target/hexagon) CABAC decode bin The following instruction is added S2_cabacdecbin Rdd32=decbin(Rss32,Rtt32) Test cases added to tests/tcg/hexagon/misc.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <1617930474-31979-27-git-send-email-tsimpson@quicinc.com> Signed-off-by: Richard Henderson --- target/hexagon/arch.c | 91 +++++++++++++++++++++++++++++++++++ target/hexagon/arch.h | 4 ++ target/hexagon/imported/encode_pp.def | 1 + target/hexagon/imported/macros.def | 15 ++++++ target/hexagon/imported/shift.idef | 47 ++++++++++++++++++ target/hexagon/macros.h | 7 +++ tests/tcg/hexagon/misc.c | 28 +++++++++++ 7 files changed, 193 insertions(+) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index dee852e..68a55b3 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -27,6 +27,97 @@ #define SF_MANTBITS 23 #define float32_nan make_float32(0xffffffff) +/* + * These three tables are used by the cabacdecbin instruction + */ +const uint8_t rLPS_table_64x4[64][4] = { + {128, 176, 208, 240}, + {128, 167, 197, 227}, + {128, 158, 187, 216}, + {123, 150, 178, 205}, + {116, 142, 169, 195}, + {111, 135, 160, 185}, + {105, 128, 152, 175}, + {100, 122, 144, 166}, + {95, 116, 137, 158}, + {90, 110, 130, 150}, + {85, 104, 123, 142}, + {81, 99, 117, 135}, + {77, 94, 111, 128}, + {73, 89, 105, 122}, + {69, 85, 100, 116}, + {66, 80, 95, 110}, + {62, 76, 90, 104}, + {59, 72, 86, 99}, + {56, 69, 81, 94}, + {53, 65, 77, 89}, + {51, 62, 73, 85}, + {48, 59, 69, 80}, + {46, 56, 66, 76}, + {43, 53, 63, 72}, + {41, 50, 59, 69}, + {39, 48, 56, 65}, + {37, 45, 54, 62}, + {35, 43, 51, 59}, + {33, 41, 48, 56}, + {32, 39, 46, 53}, + {30, 37, 43, 50}, + {29, 35, 41, 48}, + {27, 33, 39, 45}, + {26, 31, 37, 43}, + {24, 30, 35, 41}, + {23, 28, 33, 39}, + {22, 27, 32, 37}, + {21, 26, 30, 35}, + {20, 24, 29, 33}, + {19, 23, 27, 31}, + {18, 22, 26, 30}, + {17, 21, 25, 28}, + {16, 20, 23, 27}, + {15, 19, 22, 25}, + {14, 18, 21, 24}, + {14, 17, 20, 23}, + {13, 16, 19, 22}, + {12, 15, 18, 21}, + {12, 14, 17, 20}, + {11, 14, 16, 19}, + {11, 13, 15, 18}, + {10, 12, 15, 17}, + {10, 12, 14, 16}, + {9, 11, 13, 15}, + {9, 11, 12, 14}, + {8, 10, 12, 14}, + {8, 9, 11, 13}, + {7, 9, 11, 12}, + {7, 9, 10, 12}, + {7, 8, 10, 11}, + {6, 8, 9, 11}, + {6, 7, 9, 10}, + {6, 7, 8, 9}, + {2, 2, 2, 2} +}; + +const uint8_t AC_next_state_MPS_64[64] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 62, 63 +}; + + +const uint8_t AC_next_state_LPS_64[64] = { + 0, 0, 1, 2, 2, 4, 4, 5, 6, 7, + 8, 9, 9, 11, 11, 12, 13, 13, 15, 15, + 16, 16, 18, 18, 19, 19, 21, 21, 22, 22, + 23, 24, 24, 25, 26, 26, 27, 27, 28, 29, + 29, 30, 30, 30, 31, 32, 32, 33, 33, 33, + 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, + 37, 38, 38, 63 +}; + #define BITS_MASK_8 0x5555555555555555ULL #define PAIR_MASK_8 0x3333333333333333ULL #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h index 3e0c334..7091806 100644 --- a/target/hexagon/arch.h +++ b/target/hexagon/arch.h @@ -20,6 +20,10 @@ #include "qemu/int128.h" +extern const uint8_t rLPS_table_64x4[64][4]; +extern const uint8_t AC_next_state_MPS_64[64]; +extern const uint8_t AC_next_state_LPS_64[64]; + uint64_t interleave(uint32_t odd, uint32_t even); uint64_t deinterleave(uint64_t src); int32_t conv_round(int32_t a, int n); diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index dc4eba4..35ae3d2 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1767,6 +1767,7 @@ SH_RRR_ENC(S4_vxsubaddh, "0001","01-","-","110","ddddd") SH_RRR_ENC(S4_vxaddsubhr, "0001","11-","-","00-","ddddd") SH_RRR_ENC(S4_vxsubaddhr, "0001","11-","-","01-","ddddd") SH_RRR_ENC(S4_extractp_rp, "0001","11-","-","10-","ddddd") +SH_RRR_ENC(S2_cabacdecbin, "0001","11-","-","11-","ddddd") /* implicit P0 write */ DEF_FIELDROW_DESC32(ICLASS_S3op" 0010 -------- PP------ --------","[#2] Rdd=(Rss,Rtt,Pu)") diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def index 56c99b1..32ed3bf 100755 --- a/target/hexagon/imported/macros.def +++ b/target/hexagon/imported/macros.def @@ -92,6 +92,21 @@ DEF_MACRO( /* attribs */ ) + +DEF_MACRO( + fINSERT_RANGE, + { + int offset=LOWBIT; + int width=HIBIT-LOWBIT+1; + /* clear bits where new bits go */ + INREG &= ~(((fCONSTLL(1)<>29)&3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS= (range&0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + RddV = AC_next_state_MPS_64[state]; + fINSERT_RANGE(RddV,8,8,valMPS); + fINSERT_RANGE(RddV,31,23,(rMPS>>23)); + fSETWORD(1,RddV,offset); + fWRITE_P0(valMPS); + + + } + /* least probable region */ + else { + RddV = AC_next_state_LPS_64[state]; + fINSERT_RANGE(RddV,8,8,((!state)?(1-valMPS):(valMPS))); + fINSERT_RANGE(RddV,31,23,(rLPS>>23)); + fSETWORD(1,RddV,(offset-rMPS)); + fWRITE_P0((valMPS^1)); + } +}) + + Q6INSN(S2_clb,"Rd32=clb(Rs32)",ATTRIBS(), "Count leading bits", {RdV = fMAX(fCL1_4(RsV),fCL1_4(~RsV));}) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index ec5bf60..b726c3b 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -222,6 +222,13 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num) (((HIBIT) - (LOWBIT) + 1) ? \ extract64((INREG), (LOWBIT), ((HIBIT) - (LOWBIT) + 1)) : \ 0LL) +#define fINSERT_RANGE(INREG, HIBIT, LOWBIT, INVAL) \ + do { \ + int width = ((HIBIT) - (LOWBIT) + 1); \ + INREG = (width >= 0 ? \ + deposit64((INREG), (LOWBIT), width, (INVAL)) : \ + INREG); \ + } while (0) #define f8BITSOF(VAL) ((VAL) ? 0xff : 0x00) diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index e5d78b4..17c3919 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -231,6 +231,14 @@ static void check(int val, int expect) } } +static void check64(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; uint32_t array[10]; @@ -264,6 +272,16 @@ static long long creg_pair(int x, int y) return retval; } +static long long decbin(long long x, long long y, int *pred) +{ + long long retval; + asm ("%0 = decbin(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(retval), "=r"(*pred) + : "r"(x), "r"(y)); + return retval; +} + /* Check that predicates are auto-and'ed in a packet */ static int auto_and(void) { @@ -282,6 +300,8 @@ static int auto_and(void) int main() { + long long res64; + int pred; memcpy(array, init, sizeof(array)); S4_storerhnew_rr(array, 4, 0xffff); @@ -391,6 +411,14 @@ int main() res = test_clrtnew(2, 7); check(res, 7); + res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); + check64(res64, 0x357980003700010cLL); + check(pred, 0); + + res64 = decbin(0xfLL, 0x1bLL, &pred); + check64(res64, 0x78000100LL); + check(pred, 1); + res = auto_and(); check(res, 0); -- cgit v1.1