diff options
-rw-r--r-- | target/hexagon/arch.c | 6 | ||||
-rw-r--r-- | target/hexagon/fma_emu.h | 6 | ||||
-rw-r--r-- | target/hexagon/genptr.c | 14 | ||||
-rw-r--r-- | target/hexagon/macros.h | 4 | ||||
-rw-r--r-- | target/hexagon/mmvec/macros.h | 6 | ||||
-rw-r--r-- | target/hexagon/op_helper.c | 28 | ||||
-rw-r--r-- | tests/tcg/hexagon/Makefile.target | 8 | ||||
-rw-r--r-- | tests/tcg/hexagon/circ.c | 5 | ||||
-rw-r--r-- | tests/tcg/hexagon/fpstuff.c | 123 | ||||
-rw-r--r-- | tests/tcg/hexagon/hvx_misc.c | 71 | ||||
-rw-r--r-- | tests/tcg/hexagon/overflow.c | 61 | ||||
-rw-r--r-- | tests/tcg/hexagon/preg_alias.c | 84 | ||||
-rw-r--r-- | tests/tcg/hexagon/usr.c | 1141 |
13 files changed, 1474 insertions, 83 deletions
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index 68a55b3..da79b41 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -298,8 +298,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust, } else { PeV = 0x00; /* Basic checks passed */ - n_exp = float32_getexp(RsV); - d_exp = float32_getexp(RtV); + n_exp = float32_getexp_raw(RsV); + d_exp = float32_getexp_raw(RtV); if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) { /* Near quotient underflow / inexact Q */ PeV = 0x80; diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h index e3b99a8..91591d6 100644 --- a/target/hexagon/fma_emu.h +++ b/target/hexagon/fma_emu.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,6 +24,10 @@ static inline bool is_finite(float64 x) } int32_t float64_getexp(float64 f64); +static inline uint32_t float32_getexp_raw(float32 f32) +{ + return extract32(f32, 23, 8); +} int32_t float32_getexp(float32 f32); float32 infinite_float32(uint8_t sign); float32 internal_fmafx(float32 a, float32 b, float32 c, diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 4419d30..cd6af4b 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -210,11 +210,15 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, } } -static inline void gen_write_p3_0(TCGv control_reg) +static void gen_write_p3_0(DisasContext *ctx, TCGv control_reg) { + TCGv hex_p8 = tcg_temp_new(); for (int i = 0; i < NUM_PREGS; i++) { - tcg_gen_extract_tl(hex_pred[i], control_reg, i * 8, 8); + tcg_gen_extract_tl(hex_p8, control_reg, i * 8, 8); + gen_log_pred_write(ctx, i, hex_p8); + ctx_log_pred_write(ctx, i); } + tcg_temp_free(hex_p8); } /* @@ -228,7 +232,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, TCGv val) { if (reg_num == HEX_REG_P3_0) { - gen_write_p3_0(val); + gen_write_p3_0(ctx, val); } else { gen_log_reg_write(reg_num, val); ctx_log_reg_write(ctx, reg_num); @@ -250,7 +254,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_P3_0) { TCGv val32 = tcg_temp_new(); tcg_gen_extrl_i64_i32(val32, val); - gen_write_p3_0(val32); + gen_write_p3_0(ctx, val32); tcg_gen_extrh_i64_i32(val32, val); gen_log_reg_write(reg_num + 1, val32); tcg_temp_free(val32); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 19d103c..a78e84f 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -268,7 +268,7 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num) #define fVSATUVALN(N, VAL) \ ({ \ - (((int)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \ + (((int64_t)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \ }) #define fSATUVALN(N, VAL) \ ({ \ diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index 10f4630..8345753 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,11 +164,9 @@ target_ulong va = EA; \ target_ulong va_high = EA + LEN; \ uintptr_t ra = GETPC(); \ - int log_bank = 0; \ int log_byte = 0; \ for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \ log_byte = ((va + i0) <= va_high) && QVAL; \ - log_bank |= (log_byte << i0); \ uint8_t B; \ B = cpu_ldub_data_ra(env, EA + i0, ra); \ env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \ @@ -243,11 +241,9 @@ int i0; \ target_ulong va = EA; \ target_ulong va_high = EA + LEN; \ - int log_bank = 0; \ int log_byte = 0; \ for (i0 = 0; i0 < ELEM_SIZE; i0++) { \ log_byte = ((va + i0) <= va_high) && QVAL; \ - log_bank |= (log_byte << i0); \ LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \ ELEM_SIZE * IDX + i0); \ } \ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 057baf9..63e5ad5 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -304,8 +304,8 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS) { - int32_t K_const = sextract32(M, 24, 4); - int32_t length = sextract32(M, 0, 17); + uint32_t K_const = extract32(M, 24, 4); + uint32_t length = extract32(M, 0, 17); uint32_t new_ptr = RxV + offset; uint32_t start_addr; uint32_t end_addr; @@ -829,7 +829,7 @@ uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV) uint32_t RdV; arch_fpop_start(env); /* Hexagon checks the sign before rounding */ - if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) { + if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) { float_raise(float_flag_invalid, &env->fp_status); RdV = 0; } else { @@ -938,8 +938,7 @@ int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV) { int32_t PdV; arch_fpop_start(env); - PdV = f8BITSOF(float32_is_any_nan(RsV) || - float32_is_any_nan(RtV)); + PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status)); arch_fpop_end(env); return PdV; } @@ -948,7 +947,7 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV) { float32 RdV; arch_fpop_start(env); - RdV = float32_maxnum(RsV, RtV, &env->fp_status); + RdV = float32_maximum_number(RsV, RtV, &env->fp_status); arch_fpop_end(env); return RdV; } @@ -957,7 +956,7 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV) { float32 RdV; arch_fpop_start(env); - RdV = float32_minnum(RsV, RtV, &env->fp_status); + RdV = float32_minimum_number(RsV, RtV, &env->fp_status); arch_fpop_end(env); return RdV; } @@ -1041,10 +1040,7 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV) { float64 RddV; arch_fpop_start(env); - RddV = float64_maxnum(RssV, RttV, &env->fp_status); - if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { - float_raise(float_flag_invalid, &env->fp_status); - } + RddV = float64_maximum_number(RssV, RttV, &env->fp_status); arch_fpop_end(env); return RddV; } @@ -1053,10 +1049,7 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV) { float64 RddV; arch_fpop_start(env); - RddV = float64_minnum(RssV, RttV, &env->fp_status); - if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) { - float_raise(float_flag_invalid, &env->fp_status); - } + RddV = float64_minimum_number(RssV, RttV, &env->fp_status); arch_fpop_end(env); return RddV; } @@ -1097,8 +1090,7 @@ int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV) { int32_t PdV; arch_fpop_start(env); - PdV = f8BITSOF(float64_is_any_nan(RssV) || - float64_is_any_nan(RttV)); + PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status)); arch_fpop_end(env); return PdV; } diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 8b07a28..23b9870 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -1,5 +1,5 @@ ## -## Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ first: $(HEX_SRC)/first.S HEX_TESTS = first HEX_TESTS += hex_sigsegv HEX_TESTS += misc +HEX_TESTS += usr HEX_TESTS += preg_alias HEX_TESTS += dual_stores HEX_TESTS += multi_result @@ -43,3 +44,8 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow TESTS += $(HEX_TESTS) + +# This test has to be compiled for the -mv67t target +usr: usr.c + $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) + diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c index 67a1aa3..354416e 100644 --- a/tests/tcg/hexagon/circ.c +++ b/tests/tcg/hexagon/circ.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -415,7 +415,8 @@ static void circ_test_v3(void) { int *p = wbuf; int size = 15; - int K = 4; /* 64 bytes */ + /* set high bit in K to test unsigned extract in fcirc */ + int K = 8; /* 1024 bytes */ int element; int i; diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 0dff429..56bf562 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2020-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,8 +38,11 @@ const int SF_NaN_special = 0x7f800001; const int SF_ANY = 0x3f800000; const int SF_HEX_NAN = 0xffffffff; const int SF_small_neg = 0xab98fba8; +const int SF_denorm = 0x00000001; +const int SF_random = 0x346001d6; -const long long DF_NaN = 0x7ff8000000000000ULL; +const long long DF_QNaN = 0x7ff8000000000000ULL; +const long long DF_SNaN = 0x7ff7000000000000ULL; const long long DF_ANY = 0x3f80000000000000ULL; const long long DF_HEX_NAN = 0xffffffffffffffffULL; const long long DF_small_neg = 0xbd731f7500000000ULL; @@ -126,7 +129,7 @@ static void check_compare_exception(void) "p0 = dfcmp.eq(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -135,7 +138,7 @@ static void check_compare_exception(void) "p0 = dfcmp.gt(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -144,7 +147,7 @@ static void check_compare_exception(void) "p0 = dfcmp.ge(%2, %3)\n\t" "%0 = p0\n\t" "%1 = usr\n\t" - : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(cmp), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "p0", "usr"); check32(cmp, 0); check_fpstatus(usr, 0); @@ -206,7 +209,7 @@ static void check_dfminmax(void) int usr; /* - * Execute dfmin/dfmax instructions with one operand as NaN + * Execute dfmin/dfmax instructions with one operand as SNaN * Check that * Result is the other operand * Invalid bit in USR is set @@ -214,7 +217,7 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmin(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY) : "r2", "usr"); check64(minmax, DF_ANY); check_fpstatus(usr, FPINVF); @@ -222,13 +225,35 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmax(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_ANY) : "r2", "usr"); check64(minmax, DF_ANY); check_fpstatus(usr, FPINVF); /* - * Execute dfmin/dfmax instructions with both operands NaN + * Execute dfmin/dfmax instructions with one operand as QNaN + * Check that + * Result is the other operand + * No bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, 0); + + /* + * Execute dfmin/dfmax instructions with both operands SNaN * Check that * Result is DF_HEX_NAN * Invalid bit in USR is set @@ -236,7 +261,7 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmin(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN) : "r2", "usr"); check64(minmax, DF_HEX_NAN); check_fpstatus(usr, FPINVF); @@ -244,16 +269,39 @@ static void check_dfminmax(void) asm (CLEAR_FPSTATUS "%0 = dfmax(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "=r"(minmax), "=r"(usr) : "r"(DF_SNaN), "r"(DF_SNaN) : "r2", "usr"); check64(minmax, DF_HEX_NAN); check_fpstatus(usr, FPINVF); + + /* + * Execute dfmin/dfmax instructions with both operands QNaN + * Check that + * Result is DF_HEX_NAN + * No bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_QNaN), "r"(DF_QNaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, 0); } -static void check_recip_exception(void) +static void check_sfrecipa(void) { int result; int usr; + int pred; /* * Check that sfrecipa doesn't set status bits when @@ -329,6 +377,17 @@ static void check_recip_exception(void) : "r2", "p0", "usr"); check32(result, 0x3f800000); check_fpstatus(usr, 0); + + /* + * Check that sfrecipa properly handles denorm + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(result), "=r"(pred) : "r"(SF_denorm), "r"(SF_random) + : "p0", "usr"); + check32(result, 0x6a920001); + check32(pred, 0x80); } static void check_canonical_NaN(void) @@ -411,7 +470,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = convert_df2sf(%2)\n\t" "%1 = usr\n\t" - : "=r"(sf_result), "=r"(usr) : "r"(DF_NaN) + : "=r"(sf_result), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(sf_result, SF_HEX_NAN); check_fpstatus(usr, 0); @@ -419,7 +478,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = dfadd(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "usr"); check64(df_result, DF_HEX_NAN); check_fpstatus(usr, 0); @@ -427,7 +486,7 @@ static void check_canonical_NaN(void) asm(CLEAR_FPSTATUS "%0 = dfsub(%2, %3)\n\t" "%1 = usr\n\t" - : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "=r"(df_result), "=r"(usr) : "r"(DF_QNaN), "r"(DF_ANY) : "r2", "usr"); check64(df_result, DF_HEX_NAN); check_fpstatus(usr, 0); @@ -455,6 +514,28 @@ static void check_invsqrta(void) check32(predval, 0x0); } +static void check_sffixupn(void) +{ + int result; + + /* Check that sffixupn properly deals with denorm */ + asm volatile("%0 = sffixupn(%1, %2)\n\t" + : "=r"(result) + : "r"(SF_random), "r"(SF_denorm)); + check32(result, 0x246001d6); +} + +static void check_sffixupd(void) +{ + int result; + + /* Check that sffixupd properly deals with denorm */ + asm volatile("%0 = sffixupd(%1, %2)\n\t" + : "=r"(result) + : "r"(SF_denorm), "r"(SF_random)); + check32(result, 0x146001d6); +} + static void check_float2int_convs() { int res32; @@ -567,7 +648,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2w(%2)\n\t" "%1 = usr\n\t" - : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "=r"(res32), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(res32, -1); check_fpstatus(usr, FPINVF); @@ -575,7 +656,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2w(%2):chop\n\t" "%1 = usr\n\t" - : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "=r"(res32), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check32(res32, -1); check_fpstatus(usr, FPINVF); @@ -583,7 +664,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2d(%2)\n\t" "%1 = usr\n\t" - : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "=r"(res64), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check64(res64, -1); check_fpstatus(usr, FPINVF); @@ -591,7 +672,7 @@ static void check_float2int_convs() asm(CLEAR_FPSTATUS "%0 = convert_df2d(%2):chop\n\t" "%1 = usr\n\t" - : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "=r"(res64), "=r"(usr) : "r"(DF_QNaN) : "r2", "usr"); check64(res64, -1); check_fpstatus(usr, FPINVF); @@ -602,9 +683,11 @@ int main() check_compare_exception(); check_sfminmax(); check_dfminmax(); - check_recip_exception(); + check_sfrecipa(); check_canonical_NaN(); check_invsqrta(); + check_sffixupn(); + check_sffixupd(); check_float2int_convs(); puts(err ? "FAIL" : "PASS"); diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index 312bb98..b896f58 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ #include <stdint.h> #include <stdbool.h> #include <string.h> +#include <limits.h> int err; @@ -432,6 +433,71 @@ TEST_PRED_OP2(pred_and, and, &, "") TEST_PRED_OP2(pred_and_n, and, &, "!") TEST_PRED_OP2(pred_xor, xor, ^, "") +static void test_vadduwsat(void) +{ + /* + * Test for saturation by adding two numbers that add to more than UINT_MAX + * and make sure the result saturates to UINT_MAX + */ + const uint32_t x = 0xffff0000; + const uint32_t y = 0x000fffff; + + memset(expect, 0x12, sizeof(MMVector)); + memset(output, 0x34, sizeof(MMVector)); + + asm volatile ("v10 = vsplat(%0)\n\t" + "v11 = vsplat(%1)\n\t" + "v21.uw = vadd(v11.uw, v10.uw):sat\n\t" + "vmem(%2+#0) = v21\n\t" + : /* no outputs */ + : "r"(x), "r"(y), "r"(output) + : "v10", "v11", "v21", "memory"); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[0].uw[j] = UINT_MAX; + } + + check_output_w(__LINE__, 1); +} + +static void test_vsubuwsat_dv(void) +{ + /* + * Test for saturation by subtracting two numbers where the result is + * negative and make sure the result saturates to zero + * + * vsubuwsat_dv operates on an HVX register pair, so we'll have a + * pair of subtractions + * w - x < 0 + * y - z < 0 + */ + const uint32_t w = 0x000000b7; + const uint32_t x = 0xffffff4e; + const uint32_t y = 0x31fe88e7; + const uint32_t z = 0x7fffff79; + + memset(expect, 0x12, sizeof(MMVector) * 2); + memset(output, 0x34, sizeof(MMVector) * 2); + + asm volatile ("v16 = vsplat(%0)\n\t" + "v17 = vsplat(%1)\n\t" + "v26 = vsplat(%2)\n\t" + "v27 = vsplat(%3)\n\t" + "v25:24.uw = vsub(v17:16.uw, v27:26.uw):sat\n\t" + "vmem(%4+#0) = v24\n\t" + "vmem(%4+#1) = v25\n\t" + : /* no outputs */ + : "r"(w), "r"(y), "r"(x), "r"(z), "r"(output) + : "v16", "v17", "v24", "v25", "v26", "v27", "memory"); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[0].uw[j] = 0x00000000; + expect[1].uw[j] = 0x00000000; + } + + check_output_w(__LINE__, 2); +} + int main() { init_buffers(); @@ -464,6 +530,9 @@ int main() test_pred_and_n(true); test_pred_xor(false); + test_vadduwsat(); + test_vsubuwsat_dv(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } diff --git a/tests/tcg/hexagon/overflow.c b/tests/tcg/hexagon/overflow.c index 196fcf7..9408785 100644 --- a/tests/tcg/hexagon/overflow.c +++ b/tests/tcg/hexagon/overflow.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -72,6 +72,20 @@ int read_usr_overflow(void) return result & 1; } +int get_usr_overflow(int usr) +{ + return usr & 1; +} + +int get_usr_fp_invalid(int usr) +{ + return (usr >> 1) & 1; +} + +int get_usr_lpcfg(int usr) +{ + return (usr >> 8) & 0x3; +} jmp_buf jmp_env; int usr_overflow; @@ -82,6 +96,49 @@ static void sig_segv(int sig, siginfo_t *info, void *puc) longjmp(jmp_env, 1); } +static void test_packet(void) +{ + int convres; + int satres; + int usr; + + asm("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "r2 = clrbit(r2, #1)\n\t" /* clear FP invalid bit */ + "usr = r2\n\t" + "{\n\t" + " %0 = convert_sf2uw(%3):chop\n\t" + " %1 = satb(%4)\n\t" + "}\n\t" + "%2 = usr\n\t" + : "=r"(convres), "=r"(satres), "=r"(usr) + : "r"(0x6a051b86), "r"(0x0410eec0) + : "r2", "usr"); + + check(convres, 0xffffffff); + check(satres, 0x7f); + check(get_usr_overflow(usr), 1); + check(get_usr_fp_invalid(usr), 1); + + asm("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "%2 = r2\n\t" + "p3 = sp3loop0(1f, #1)\n\t" + "1:\n\t" + "{\n\t" + " %0 = satb(%2)\n\t" + "}:endloop0\n\t" + "%1 = usr\n\t" + : "=r"(satres), "=r"(usr) + : "r"(0x0410eec0) + : "r2", "usr", "p3", "sa0", "lc0"); + + check(satres, 0x7f); + check(get_usr_overflow(usr), 1); + check(get_usr_lpcfg(usr), 2); +} + int main() { struct sigaction act; @@ -102,6 +159,8 @@ int main() check(usr_overflow, 0); + test_packet(); + puts(err ? "FAIL" : "PASS"); return err ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tests/tcg/hexagon/preg_alias.c b/tests/tcg/hexagon/preg_alias.c index 0cac469..b44a811 100644 --- a/tests/tcg/hexagon/preg_alias.c +++ b/tests/tcg/hexagon/preg_alias.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -57,17 +57,15 @@ typedef union { static inline void creg_alias(int cval, PRegs *pregs) { - unsigned char val; - asm volatile("c4 = %0" : : "r"(cval)); - - asm volatile("%0 = p0" : "=r"(val)); - pregs->pregs.p0 = val; - asm volatile("%0 = p1" : "=r"(val)); - pregs->pregs.p1 = val; - asm volatile("%0 = p2" : "=r"(val)); - pregs->pregs.p2 = val; - asm volatile("%0 = p3" : "=r"(val)); - pregs->pregs.p3 = val; + asm("c4 = %4\n\t" + "%0 = p0\n\t" + "%1 = p1\n\t" + "%2 = p2\n\t" + "%3 = p3\n\t" + : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), + "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3) + : "r"(cval) + : "p0", "p1", "p2", "p3"); } int err; @@ -83,22 +81,58 @@ static void check(int val, int expect) static inline void creg_alias_pair(unsigned int cval, PRegs *pregs) { unsigned long long cval_pair = (0xdeadbeefULL << 32) | cval; - unsigned char val; int c5; - asm volatile("c5:4 = %0" : : "r"(cval_pair)); - - asm volatile("%0 = p0" : "=r"(val)); - pregs->pregs.p0 = val; - asm volatile("%0 = p1" : "=r"(val)); - pregs->pregs.p1 = val; - asm volatile("%0 = p2" : "=r"(val)); - pregs->pregs.p2 = val; - asm volatile("%0 = p3" : "=r"(val)); - pregs->pregs.p3 = val; - asm volatile("%0 = c5" : "=r"(c5)); + + asm ("c5:4 = %5\n\t" + "%0 = p0\n\t" + "%1 = p1\n\t" + "%2 = p2\n\t" + "%3 = p3\n\t" + "%4 = c5\n\t" + : "=r"(pregs->pregs.p0), "=r"(pregs->pregs.p1), + "=r"(pregs->pregs.p2), "=r"(pregs->pregs.p3), "=r"(c5) + : "r"(cval_pair) + : "p0", "p1", "p2", "p3"); + check(c5, 0xdeadbeef); } +static void test_packet(void) +{ + /* + * Test that setting c4 inside a packet doesn't impact the predicates + * that are read during the packet. + */ + + int result; + int old_val = 0x0000001c; + + /* Test a predicated register transfer */ + result = old_val; + asm ( + "c4 = %1\n\t" + "{\n\t" + " c4 = %2\n\t" + " if (!p2) %0 = %3\n\t" + "}\n\t" + : "+r"(result) + : "r"(0xffffffff), "r"(0xff00ffff), "r"(0x837ed653) + : "p0", "p1", "p2", "p3"); + check(result, old_val); + + /* Test a predicated store */ + result = 0xffffffff; + asm ("c4 = %0\n\t" + "{\n\t" + " c4 = %1\n\t" + " if (!p2) memw(%2) = #0\n\t" + "}\n\t" + : + : "r"(0), "r"(0xffffffff), "r"(&result) + : "p0", "p1", "p2", "p3", "memory"); + check(result, 0x0); +} + int main() { int c4; @@ -164,6 +198,8 @@ int main() creg_alias_pair(0xffffffff, &pregs); check(pregs.creg, 0xffffffff); + test_packet(); + puts(err ? "FAIL" : "PASS"); return err; } diff --git a/tests/tcg/hexagon/usr.c b/tests/tcg/hexagon/usr.c new file mode 100644 index 0000000..a531511 --- /dev/null +++ b/tests/tcg/hexagon/usr.c @@ -0,0 +1,1141 @@ +/* + * Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test instructions that might set bits in user status register (USR) + */ + +#include <stdio.h> +#include <stdint.h> + +int err; + +static void __check(int line, uint32_t val, uint32_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: %d != %d\n", line, val, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +static void __check32(int line, uint32_t val, uint32_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", line, val, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t val, uint64_t expect) +{ + if (val != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", line, val, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +/* + * Some of the instructions tested are only available on certain versions + * of the Hexagon core + */ +#define CORE_HAS_AUDIO (__HEXAGON_ARCH__ >= 67 && defined(__HEXAGON_AUDIO__)) +#define CORE_IS_V67 (__HEXAGON_ARCH__ >= 67) + +/* Define the bits in Hexagon USR register */ +#define USR_OVF_BIT 0 /* Sticky saturation overflow */ +#define USR_FPINVF_BIT 1 /* IEEE FP invalid sticky flag */ +#define USR_FPDBZF_BIT 2 /* IEEE FP divide-by-zero sticky flag */ +#define USR_FPOVFF_BIT 3 /* IEEE FP overflow sticky flag */ +#define USR_FPUNFF_BIT 4 /* IEEE FP underflow sticky flag */ +#define USR_FPINPF_BIT 5 /* IEEE FP inexact sticky flag */ + +/* Corresponding values in USR */ +#define USR_CLEAR 0 +#define USR_OVF (1 << USR_OVF_BIT) +#define USR_FPINVF (1 << USR_FPINVF_BIT) +#define USR_FPDBZF (1 << USR_FPDBZF_BIT) +#define USR_FPOVFF (1 << USR_FPOVFF_BIT) +#define USR_FPUNFF (1 << USR_FPUNFF_BIT) +#define USR_FPINPF (1 << USR_FPINPF_BIT) + +/* Some useful floating point values */ +const uint32_t SF_INF = 0x7f800000; +const uint32_t SF_QNaN = 0x7fc00000; +const uint32_t SF_SNaN = 0x7fb00000; +const uint32_t SF_QNaN_neg = 0xffc00000; +const uint32_t SF_SNaN_neg = 0xffb00000; +const uint32_t SF_HEX_NaN = 0xffffffff; +const uint32_t SF_zero = 0x00000000; +const uint32_t SF_one = 0x3f800000; +const uint32_t SF_one_recip = 0x3f7f0001; /* 0.9960... */ +const uint32_t SF_one_invsqrta = 0x3f7f0000; /* 0.99609375 */ +const uint32_t SF_two = 0x40000000; +const uint32_t SF_four = 0x40800000; +const uint32_t SF_small_neg = 0xab98fba8; +const uint32_t SF_large_pos = 0x5afa572e; + +const uint64_t DF_QNaN = 0x7ff8000000000000ULL; +const uint64_t DF_SNaN = 0x7ff7000000000000ULL; +const uint64_t DF_QNaN_neg = 0xfff8000000000000ULL; +const uint64_t DF_SNaN_neg = 0xfff7000000000000ULL; +const uint64_t DF_HEX_NaN = 0xffffffffffffffffULL; +const uint64_t DF_zero = 0x0000000000000000ULL; +const uint64_t DF_any = 0x3f80000000000000ULL; +const uint64_t DF_one = 0x3ff0000000000000ULL; +const uint64_t DF_one_hh = 0x3ff001ff80000000ULL; /* 1.00048... */ +const uint64_t DF_small_neg = 0xbd731f7500000000ULL; +const uint64_t DF_large_pos = 0x7f80000000000001ULL; + +/* + * Templates for functions to execute an instruction + * + * The templates vary by the number of arguments and the types of the args + * and result. We use one letter in the macro name for the result and each + * argument: + * x unknown (specified in a subsequent template) or don't care + * R register (32 bits) + * P pair (64 bits) + * p predicate + * I immediate + * Xx read/write + */ + +/* Clear bits 0-5 in USR */ +#define CLEAR_USRBITS \ + "r2 = usr\n\t" \ + "r2 = and(r2, #0xffffffc0)\n\t" \ + "usr = r2\n\t" + +/* Template for instructions with one register operand */ +#define FUNC_x_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \ +static RESTYPE NAME(SRCTYPE src, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_R_OP_R(NAME, INSN) \ +FUNC_x_OP_x(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_P(NAME, INSN) \ +FUNC_x_OP_x(uint32_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_P(NAME, INSN) \ +FUNC_x_OP_x(uint64_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_R(NAME, INSN) \ +FUNC_x_OP_x(uint64_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a register and predicate result + * and one register operand + */ +#define FUNC_xp_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \ +static RESTYPE NAME(SRCTYPE src, uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint8_t pred; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "=r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src) \ + : "r2", "p2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_Rp_OP_R(NAME, INSN) \ +FUNC_xp_OP_x(uint32_t, uint32_t, NAME, INSN) + +/* Template for instructions with two register operands */ +#define FUNC_x_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_P_OP_PP(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_R_OP_PP(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_P_OP_RR(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_RR(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_PR(NAME, INSN) \ +FUNC_x_OP_xx(uint32_t, uint64_t, uint32_t, NAME, INSN) + +#define FUNC_P_OP_PR(NAME, INSN) \ +FUNC_x_OP_xx(uint64_t, uint64_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a register and predicate result + * and two register operands + */ +#define FUNC_xp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, \ + uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint8_t pred; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "=r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "p2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_Rp_OP_RR(NAME, INSN) \ +FUNC_xp_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +/* Template for instructions with one register and one immediate */ +#define FUNC_x_OP_xI(RESTYPE, SRC1TYPE, NAME, INSN) \ +static RESTYPE NAME(SRC1TYPE src1, int32_t src2, uint32_t *usr_result) \ +{ \ + RESTYPE result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "i"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_R_OP_RI(NAME, INSN) \ +FUNC_x_OP_xI(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_R_OP_PI(NAME, INSN) \ +FUNC_x_OP_xI(uint32_t, uint64_t, NAME, INSN) + +/* + * Template for instructions with a read/write result + * and two register operands + */ +#define FUNC_Xx_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \ + uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "+r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XR_OP_RR(NAME, INSN) \ +FUNC_Xx_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN) + +#define FUNC_XP_OP_PP(NAME, INSN) \ +FUNC_Xx_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +#define FUNC_XP_OP_RR(NAME, INSN) \ +FUNC_Xx_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN) + +/* + * Template for instructions with a read/write result + * and two register operands + */ +#define FUNC_Xxp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \ + uint8_t *pred_result, uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + uint8_t pred; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%1 = p2\n\t" \ + "%2 = usr\n\t" \ + : "+r"(result), "=r"(pred), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "r2", "usr"); \ + *pred_result = pred; \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XPp_OP_PP(NAME, INSN) \ +FUNC_Xxp_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN) + +/* + * Template for instructions with a read/write result and + * two register and one predicate operands + */ +#define FUNC_Xx_OP_xxp(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, uint8_t pred,\ + uint32_t *usr_result) \ +{ \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + "p2 = %4\n\t" \ + INSN "\n\t" \ + "%1 = usr\n\t" \ + : "+r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2), "r"(pred) \ + : "r2", "p2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_XR_OP_RRp(NAME, INSN) \ +FUNC_Xx_OP_xxp(uint32_t, uint32_t, uint32_t, NAME, INSN) + +/* Template for compare instructions with two register operands */ +#define FUNC_CMP_xx(SRC1TYPE, SRC2TYPE, NAME, INSN) \ +static uint32_t NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \ +{ \ + uint32_t result; \ + uint32_t usr; \ + asm(CLEAR_USRBITS \ + INSN "\n\t" \ + "%0 = p1\n\t" \ + "%1 = usr\n\t" \ + : "=r"(result), "=r"(usr) \ + : "r"(src1), "r"(src2) \ + : "p1", "r2", "usr"); \ + *usr_result = usr & 0x3f; \ + return result; \ +} + +#define FUNC_CMP_RR(NAME, INSN) \ +FUNC_CMP_xx(uint32_t, uint32_t, NAME, INSN) + +#define FUNC_CMP_PP(NAME, INSN) \ +FUNC_CMP_xx(uint64_t, uint64_t, NAME, INSN) + +/* + * Function declarations using the templates + */ +FUNC_R_OP_R(satub, "%0 = satub(%2)") +FUNC_P_OP_PP(vaddubs, "%0 = vaddub(%2, %3):sat") +FUNC_P_OP_PP(vadduhs, "%0 = vadduh(%2, %3):sat") +FUNC_P_OP_PP(vsububs, "%0 = vsubub(%2, %3):sat") +FUNC_P_OP_PP(vsubuhs, "%0 = vsubuh(%2, %3):sat") + +/* Add vector of half integers with saturation and pack to unsigned bytes */ +FUNC_R_OP_PP(vaddhubs, "%0 = vaddhub(%2, %3):sat") + +/* Vector saturate half to unsigned byte */ +FUNC_R_OP_P(vsathub, "%0 = vsathub(%2)") + +/* Similar to above but takes a 32-bit argument */ +FUNC_R_OP_R(svsathub, "%0 = vsathub(%2)") + +/* Vector saturate word to unsigned half */ +FUNC_P_OP_P(vsatwuh_nopack, "%0 = vsatwuh(%2)") + +/* Similar to above but returns a 32-bit result */ +FUNC_R_OP_P(vsatwuh, "%0 = vsatwuh(%2)") + +/* Vector arithmetic shift halfwords with saturate and pack */ +FUNC_R_OP_PI(asrhub_sat, "%0 = vasrhub(%2, #%3):sat") + +/* Vector arithmetic shift halfwords with round, saturate and pack */ +FUNC_R_OP_PI(asrhub_rnd_sat, "%0 = vasrhub(%2, #%3):raw") + +FUNC_R_OP_RR(addsat, "%0 = add(%2, %3):sat") +/* Similar to above but with register pairs */ +FUNC_P_OP_PP(addpsat, "%0 = add(%2, %3):sat") + +FUNC_XR_OP_RR(mpy_acc_sat_hh_s0, "%0 += mpy(%2.H, %3.H):sat") +FUNC_R_OP_RR(mpy_sat_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:sat") +FUNC_R_OP_RR(mpy_sat_rnd_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:rnd:sat") +FUNC_R_OP_RR(mpy_up_s1_sat, "%0 = mpy(%2, %3):<<1:sat") +FUNC_P_OP_RR(vmpy2s_s1, "%0 = vmpyh(%2, %3):<<1:sat") +FUNC_P_OP_RR(vmpy2su_s1, "%0 = vmpyhsu(%2, %3):<<1:sat") +FUNC_R_OP_RR(vmpy2s_s1pack, "%0 = vmpyh(%2, %3):<<1:rnd:sat") +FUNC_P_OP_PP(vmpy2es_s1, "%0 = vmpyeh(%2, %3):<<1:sat") +FUNC_R_OP_PP(vdmpyrs_s1, "%0 = vdmpy(%2, %3):<<1:rnd:sat") +FUNC_XP_OP_PP(vdmacs_s0, "%0 += vdmpy(%2, %3):sat") +FUNC_R_OP_RR(cmpyrs_s0, "%0 = cmpy(%2, %3):rnd:sat") +FUNC_XP_OP_RR(cmacs_s0, "%0 += cmpy(%2, %3):sat") +FUNC_XP_OP_RR(cnacs_s0, "%0 -= cmpy(%2, %3):sat") +FUNC_P_OP_PP(vrcmpys_s1_h, "%0 = vrcmpys(%2, %3):<<1:sat:raw:hi") +FUNC_XP_OP_PP(mmacls_s0, "%0 += vmpyweh(%2, %3):sat") +FUNC_R_OP_RR(hmmpyl_rs1, "%0 = mpy(%2, %3.L):<<1:rnd:sat") +FUNC_XP_OP_PP(mmaculs_s0, "%0 += vmpyweuh(%2, %3):sat") +FUNC_R_OP_PR(cmpyi_wh, "%0 = cmpyiwh(%2, %3):<<1:rnd:sat") +FUNC_P_OP_PP(vcmpy_s0_sat_i, "%0 = vcmpyi(%2, %3):sat") +FUNC_P_OP_PR(vcrotate, "%0 = vcrotate(%2, %3)") +FUNC_P_OP_PR(vcnegh, "%0 = vcnegh(%2, %3)") + +#if CORE_HAS_AUDIO +FUNC_R_OP_PP(wcmpyrw, "%0 = cmpyrw(%2, %3):<<1:sat") +#endif + +FUNC_R_OP_RR(addh_l16_sat_ll, "%0 = add(%2.L, %3.L):sat") +FUNC_P_OP_P(vconj, "%0 = vconj(%2):sat") +FUNC_P_OP_PP(vxaddsubw, "%0 = vxaddsubw(%2, %3):sat") +FUNC_P_OP_P(vabshsat, "%0 = vabsh(%2):sat") +FUNC_P_OP_PP(vnavgwr, "%0 = vnavgw(%2, %3):rnd:sat") +FUNC_R_OP_RI(round_ri_sat, "%0 = round(%2, #%3):sat") +FUNC_R_OP_RR(asr_r_r_sat, "%0 = asr(%2, %3):sat") + +FUNC_XPp_OP_PP(ACS, "%0, p2 = vacsh(%3, %4)") + +/* Floating point */ +FUNC_R_OP_RR(sfmin, "%0 = sfmin(%2, %3)") +FUNC_R_OP_RR(sfmax, "%0 = sfmax(%2, %3)") +FUNC_R_OP_RR(sfadd, "%0 = sfadd(%2, %3)") +FUNC_R_OP_RR(sfsub, "%0 = sfsub(%2, %3)") +FUNC_R_OP_RR(sfmpy, "%0 = sfmpy(%2, %3)") +FUNC_XR_OP_RR(sffma, "%0 += sfmpy(%2, %3)") +FUNC_XR_OP_RR(sffms, "%0 -= sfmpy(%2, %3)") +FUNC_CMP_RR(sfcmpuo, "p1 = sfcmp.uo(%2, %3)") +FUNC_CMP_RR(sfcmpeq, "p1 = sfcmp.eq(%2, %3)") +FUNC_CMP_RR(sfcmpgt, "p1 = sfcmp.gt(%2, %3)") +FUNC_CMP_RR(sfcmpge, "p1 = sfcmp.ge(%2, %3)") + +FUNC_P_OP_PP(dfadd, "%0 = dfadd(%2, %3)") +FUNC_P_OP_PP(dfsub, "%0 = dfsub(%2, %3)") + +#if CORE_IS_V67 +FUNC_P_OP_PP(dfmin, "%0 = dfmin(%2, %3)") +FUNC_P_OP_PP(dfmax, "%0 = dfmax(%2, %3)") +FUNC_XP_OP_PP(dfmpyhh, "%0 += dfmpyhh(%2, %3)") +#endif + +FUNC_CMP_PP(dfcmpuo, "p1 = dfcmp.uo(%2, %3)") +FUNC_CMP_PP(dfcmpeq, "p1 = dfcmp.eq(%2, %3)") +FUNC_CMP_PP(dfcmpgt, "p1 = dfcmp.gt(%2, %3)") +FUNC_CMP_PP(dfcmpge, "p1 = dfcmp.ge(%2, %3)") + +/* Conversions from sf */ +FUNC_P_OP_R(conv_sf2df, "%0 = convert_sf2df(%2)") +FUNC_R_OP_R(conv_sf2uw, "%0 = convert_sf2uw(%2)") +FUNC_R_OP_R(conv_sf2w, "%0 = convert_sf2w(%2)") +FUNC_P_OP_R(conv_sf2ud, "%0 = convert_sf2ud(%2)") +FUNC_P_OP_R(conv_sf2d, "%0 = convert_sf2d(%2)") +FUNC_R_OP_R(conv_sf2uw_chop, "%0 = convert_sf2uw(%2):chop") +FUNC_R_OP_R(conv_sf2w_chop, "%0 = convert_sf2w(%2):chop") +FUNC_P_OP_R(conv_sf2ud_chop, "%0 = convert_sf2ud(%2):chop") +FUNC_P_OP_R(conv_sf2d_chop, "%0 = convert_sf2d(%2):chop") + +/* Conversions from df */ +FUNC_R_OP_P(conv_df2sf, "%0 = convert_df2sf(%2)") +FUNC_R_OP_P(conv_df2uw, "%0 = convert_df2uw(%2)") +FUNC_R_OP_P(conv_df2w, "%0 = convert_df2w(%2)") +FUNC_P_OP_P(conv_df2ud, "%0 = convert_df2ud(%2)") +FUNC_P_OP_P(conv_df2d, "%0 = convert_df2d(%2)") +FUNC_R_OP_P(conv_df2uw_chop, "%0 = convert_df2uw(%2):chop") +FUNC_R_OP_P(conv_df2w_chop, "%0 = convert_df2w(%2):chop") +FUNC_P_OP_P(conv_df2ud_chop, "%0 = convert_df2ud(%2):chop") +FUNC_P_OP_P(conv_df2d_chop, "%0 = convert_df2d(%2):chop") + +/* Integer to float conversions */ +FUNC_R_OP_R(conv_uw2sf, "%0 = convert_uw2sf(%2)") +FUNC_R_OP_R(conv_w2sf, "%0 = convert_w2sf(%2)") +FUNC_R_OP_P(conv_ud2sf, "%0 = convert_ud2sf(%2)") +FUNC_R_OP_P(conv_d2sf, "%0 = convert_d2sf(%2)") + +/* Special purpose floating point instructions */ +FUNC_XR_OP_RRp(sffma_sc, "%0 += sfmpy(%2, %3, p2):scale") +FUNC_Rp_OP_RR(sfrecipa, "%0, p2 = sfrecipa(%3, %4)") +FUNC_R_OP_RR(sffixupn, "%0 = sffixupn(%2, %3)") +FUNC_R_OP_RR(sffixupd, "%0 = sffixupd(%2, %3)") +FUNC_R_OP_R(sffixupr, "%0 = sffixupr(%2)") +FUNC_Rp_OP_R(sfinvsqrta, "%0, p2 = sfinvsqrta(%3)") + +/* + * Templates for test cases + * + * Same naming convention as the function templates + */ +#define TEST_x_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRCTYPE src = SRC; \ + uint32_t usr_result; \ + result = FUNC(src, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_R_OP_R(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, USR_RES) + +#define TEST_R_OP_P(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint32_t, check32, uint64_t, FUNC, SRC, RES, USR_RES) + +#define TEST_P_OP_P(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint64_t, check64, uint64_t, FUNC, SRC, RES, USR_RES) + +#define TEST_P_OP_R(FUNC, SRC, RES, USR_RES) \ +TEST_x_OP_x(uint64_t, check64, uint32_t, FUNC, SRC, RES, USR_RES) + +#define TEST_xp_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, \ + RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRCTYPE src = SRC; \ + uint8_t pred_result; \ + uint32_t usr_result; \ + result = FUNC(src, &pred_result, &usr_result); \ + CHECKFN(result, RES); \ + check(pred_result, PRED_RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_Rp_OP_R(FUNC, SRC, RES, PRED_RES, USR_RES) \ +TEST_xp_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, PRED_RES, USR_RES) + +#define TEST_x_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_P_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint64_t, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint64_t, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_P_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint32_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint32_t, check32, uint64_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_P_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xx(uint64_t, check64, uint64_t, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_xp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, FUNC, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred_result; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &pred_result, &usr_result); \ + CHECKFN(result, RES); \ + check(pred_result, PRED_RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_Rp_OP_RR(FUNC, SRC1, SRC2, RES, PRED_RES, USR_RES) \ +TEST_xp_OP_xx(uint32_t, check32, uint32_t, uint32_t, FUNC, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) + +#define TEST_x_OP_xI(RESTYPE, CHECKFN, SRC1TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result; \ + SRC1TYPE src1 = SRC1; \ + uint32_t src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_R_OP_RI(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xI(uint32_t, check32, uint32_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_R_OP_PI(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_x_OP_xI(uint32_t, check64, uint64_t, \ + FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_Xx_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XR_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_XP_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint64_t, check64, uint64_t, uint64_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_XP_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \ +TEST_Xx_OP_xx(uint64_t, check64, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, RES, USR_RES) + +#define TEST_Xxp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred_res; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, &pred_res, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XPp_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \ +TEST_Xxp_OP_xx(uint64_t, check64, uint64_t, uint64_t, FUNC, RESIN, SRC1, SRC2, \ + RES, PRED_RES, USR_RES) + +#define TEST_Xx_OP_xxp(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \ + FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \ + do { \ + RESTYPE result = RESIN; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint8_t pred = PRED; \ + uint32_t usr_result; \ + result = FUNC(result, src1, src2, pred, &usr_result); \ + CHECKFN(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_XR_OP_RRp(FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \ +TEST_Xx_OP_xxp(uint32_t, check32, uint32_t, uint32_t, \ + FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) + +#define TEST_CMP_xx(SRC1TYPE, SRC2TYPE, \ + FUNC, SRC1, SRC2, RES, USR_RES) \ + do { \ + uint32_t result; \ + SRC1TYPE src1 = SRC1; \ + SRC2TYPE src2 = SRC2; \ + uint32_t usr_result; \ + result = FUNC(src1, src2, &usr_result); \ + check(result, RES); \ + check(usr_result, USR_RES); \ + } while (0) + +#define TEST_CMP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_CMP_xx(uint32_t, uint32_t, FUNC, SRC1, SRC2, RES, USR_RES) + +#define TEST_CMP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \ +TEST_CMP_xx(uint64_t, uint64_t, FUNC, SRC1, SRC2, RES, USR_RES) + +int main() +{ + TEST_R_OP_R(satub, 0, 0, USR_CLEAR); + TEST_R_OP_R(satub, 0xff, 0xff, USR_CLEAR); + TEST_R_OP_R(satub, 0xfff, 0xff, USR_OVF); + TEST_R_OP_R(satub, -1, 0, USR_OVF); + + TEST_P_OP_PP(vaddubs, 0xfeLL, 0x01LL, 0xffLL, USR_CLEAR); + TEST_P_OP_PP(vaddubs, 0xffLL, 0xffLL, 0xffLL, USR_OVF); + + TEST_P_OP_PP(vadduhs, 0xfffeLL, 0x1LL, 0xffffLL, USR_CLEAR); + TEST_P_OP_PP(vadduhs, 0xffffLL, 0x1LL, 0xffffLL, USR_OVF); + + TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0101010101010101LL, + 0x0706050403020100LL, USR_CLEAR); + TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0202020202020202LL, + 0x0605040302010000LL, USR_OVF); + + TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0001000100010001LL, + 0x0003000200010000LL, USR_CLEAR); + TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0002000200020002LL, + 0x0002000100000000LL, USR_OVF); + + TEST_R_OP_PP(vaddhubs, 0x0004000300020001LL, 0x0001000100010001LL, + 0x05040302, USR_CLEAR); + TEST_R_OP_PP(vaddhubs, 0x7fff000300020001LL, 0x0002000200020002LL, + 0xff050403, USR_OVF); + + TEST_R_OP_P(vsathub, 0x0001000300020001LL, 0x01030201, USR_CLEAR); + TEST_R_OP_P(vsathub, 0x010000700080ffffLL, 0xff708000, USR_OVF); + + TEST_R_OP_P(vsatwuh, 0x0000ffff00000001LL, 0xffff0001, USR_CLEAR); + TEST_R_OP_P(vsatwuh, 0x800000000000ffffLL, 0x0000ffff, USR_OVF); + + TEST_P_OP_P(vsatwuh_nopack, 0x0000ffff00000001LL, 0x0000ffff00000001LL, + USR_CLEAR); + TEST_P_OP_P(vsatwuh_nopack, 0x800000000000ffffLL, 0x000000000000ffffLL, + USR_OVF); + + TEST_R_OP_R(svsathub, 0x00020001, 0x0201, USR_CLEAR); + TEST_R_OP_R(svsathub, 0x0080ffff, 0x8000, USR_OVF); + + TEST_R_OP_PI(asrhub_sat, 0x004f003f002f001fLL, 3, 0x09070503, + USR_CLEAR); + TEST_R_OP_PI(asrhub_sat, 0x004fffff8fff001fLL, 3, 0x09000003, + USR_OVF); + + TEST_R_OP_PI(asrhub_rnd_sat, 0x004f003f002f001fLL, 2, 0x0a080604, + USR_CLEAR); + TEST_R_OP_PI(asrhub_rnd_sat, 0x004fffff8fff001fLL, 2, 0x0a000004, + USR_OVF); + + TEST_R_OP_RR(addsat, 1, 2, 3, + USR_CLEAR); + TEST_R_OP_RR(addsat, 0x7fffffff, 0x00000010, 0x7fffffff, + USR_OVF); + TEST_R_OP_RR(addsat, 0x80000000, 0x80000006, 0x80000000, + USR_OVF); + + TEST_P_OP_PP(addpsat, 1LL, 2LL, 3LL, USR_CLEAR); + /* overflow to max positive */ + TEST_P_OP_PP(addpsat, 0x7ffffffffffffff0LL, 0x0000000000000010LL, + 0x7fffffffffffffffLL, USR_OVF); + /* overflow to min negative */ + TEST_P_OP_PP(addpsat, 0x8000000000000003LL, 0x8000000000000006LL, + 0x8000000000000000LL, USR_OVF); + + TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0xffff0000, 0x11110000, + 0x7fffeeee, USR_CLEAR); + TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0x7fff0000, 0x7fff0000, + 0x7fffffff, USR_OVF); + + TEST_R_OP_RR(mpy_sat_hh_s1, 0xffff0000, 0x11110000, 0xffffddde, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_hh_s1, 0x7fff0000, 0x7fff0000, 0x7ffe0002, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_hh_s1, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0xffff0000, 0x11110000, 0x00005dde, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0x7fff0000, 0x7fff0000, 0x7ffe8002, + USR_CLEAR); + TEST_R_OP_RR(mpy_sat_rnd_hh_s1, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_R_OP_RR(mpy_up_s1_sat, 0xffff0000, 0x11110000, 0xffffddde, + USR_CLEAR); + TEST_R_OP_RR(mpy_up_s1_sat, 0x7fff0000, 0x7fff0000, 0x7ffe0002, + USR_CLEAR); + TEST_R_OP_RR(mpy_up_s1_sat, 0x80000000, 0x80000000, 0x7fffffff, + USR_OVF); + + TEST_P_OP_RR(vmpy2s_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL, + USR_CLEAR); + TEST_P_OP_RR(vmpy2s_s1, 0x80000000, 0x80000000, 0x7fffffff00000000LL, + USR_OVF); + + TEST_P_OP_RR(vmpy2su_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL, + USR_CLEAR); + TEST_P_OP_RR(vmpy2su_s1, 0xffffbd97, 0xffffffff, 0xfffe000280000000LL, + USR_OVF); + + TEST_R_OP_RR(vmpy2s_s1pack, 0x7fff0000, 0x7fff0000, 0x7ffe0000, + USR_CLEAR); + TEST_R_OP_RR(vmpy2s_s1pack, 0x80008000, 0x80008000, 0x7fff7fff, + USR_OVF); + + TEST_P_OP_PP(vmpy2es_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL, + 0x1ffec0021ffec002LL, USR_CLEAR); + TEST_P_OP_PP(vmpy2es_s1, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_R_OP_PP(vdmpyrs_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL, + 0x3ffe3ffe, USR_CLEAR); + TEST_R_OP_PP(vdmpyrs_s1, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fff7fffLL, USR_OVF); + + TEST_XP_OP_PP(vdmacs_s0, 0x0fffffffULL, 0x00ff00ff00ff00ffLL, + 0x00ff00ff00ff00ffLL, 0x0001fc021001fc01LL, USR_CLEAR); + TEST_XP_OP_PP(vdmacs_s0, 0x01111111ULL, 0x8000800080001000LL, + 0x8000800080008000LL, 0x7fffffff39111111LL, USR_OVF); + + TEST_R_OP_RR(cmpyrs_s0, 0x7fff0000, 0x7fff0000, 0x0000c001, + USR_CLEAR); + TEST_R_OP_RR(cmpyrs_s0, 0x80008000, 0x80008000, 0x7fff0000, + USR_OVF); + + TEST_XP_OP_RR(cmacs_s0, 0x0fffffff, 0x7fff0000, 0x7fff0000, + 0x00000000d000fffeLL, USR_CLEAR); + TEST_XP_OP_RR(cmacs_s0, 0x0fff1111, 0x80008000, 0x80008000, + 0x7fffffff0fff1111LL, USR_OVF); + + TEST_XP_OP_RR(cnacs_s0, 0x000000108fffffffULL, 0x7fff0000, 0x7fff0000, + 0x00000010cfff0000ULL, USR_CLEAR); + TEST_XP_OP_RR(cnacs_s0, 0x000000108ff1111fULL, 0x00002001, 0x00007ffd, + 0x0000001080000000ULL, USR_OVF); + + TEST_P_OP_PP(vrcmpys_s1_h, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL, + 0x0003f8040003f804LL, USR_CLEAR); + TEST_P_OP_PP(vrcmpys_s1_h, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_XP_OP_PP(mmacls_s0, 0x6fffffff, 0x00ff00ff00ff00ffLL, + 0x00ff00ff00ff00ffLL, 0x0000fe017000fe00LL, USR_CLEAR); + TEST_XP_OP_PP(mmacls_s0, 0x6f1111ff, 0x8000800080008000LL, + 0x1000100080008000LL, 0xf80008007fffffffLL, USR_OVF); + + TEST_R_OP_RR(hmmpyl_rs1, 0x7fff0000, 0x7fff0001, 0x0000fffe, + USR_CLEAR); + TEST_R_OP_RR(hmmpyl_rs1, 0x80000000, 0x80008000, 0x7fffffff, + USR_OVF); + + TEST_XP_OP_PP(mmaculs_s0, 0x000000007fffffffULL, 0xffff800080008000LL, + 0xffff800080008000LL, 0xffffc00040003fffLL, USR_CLEAR); + TEST_XP_OP_PP(mmaculs_s0, 0x000011107fffffffULL, 0x00ff00ff00ff00ffLL, + 0x00ff00ff001100ffLL, 0x00010f117fffffffLL, USR_OVF); + + TEST_R_OP_PR(cmpyi_wh, 0x7fff000000000000LL, 0x7fff0001, 0x0000fffe, + USR_CLEAR); + TEST_R_OP_PR(cmpyi_wh, 0x8000000000000000LL, 0x80008000, 0x7fffffff, + USR_OVF); + + TEST_P_OP_PP(vcmpy_s0_sat_i, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL, + 0x0001fc020001fc02LL, USR_CLEAR); + TEST_P_OP_PP(vcmpy_s0_sat_i, 0x8000800080008000LL, 0x8000800080008000LL, + 0x7fffffff7fffffffLL, USR_OVF); + + TEST_P_OP_PR(vcrotate, 0x8000000000000000LL, 0x00000002, + 0x8000000000000000LL, USR_CLEAR); + TEST_P_OP_PR(vcrotate, 0x7fff80007fff8000LL, 0x00000001, + 0x7fff80007fff7fffLL, USR_OVF); + + TEST_P_OP_PR(vcnegh, 0x8000000000000000LL, 0x00000002, + 0x8000000000000000LL, USR_CLEAR); + TEST_P_OP_PR(vcnegh, 0x7fff80007fff8000LL, 0x00000001, + 0x7fff80007fff7fffLL, USR_OVF); + +#if CORE_HAS_AUDIO + TEST_R_OP_PP(wcmpyrw, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0x00000001, USR_CLEAR); + TEST_R_OP_PP(wcmpyrw, 0x800000007fffffffLL, 0x000000ff7fffffffLL, + 0x7fffffff, USR_OVF); + TEST_R_OP_PP(wcmpyrw, 0x7fffffff80000000LL, 0x7fffffff000000ffLL, + 0x80000000, USR_OVF); +#else + printf("Audio instructions skipped\n"); +#endif + + TEST_R_OP_RR(addh_l16_sat_ll, 0x0000ffff, 0x00000002, 0x00000001, + USR_CLEAR); + TEST_R_OP_RR(addh_l16_sat_ll, 0x00007fff, 0x00000005, 0x00007fff, + USR_OVF); + TEST_R_OP_RR(addh_l16_sat_ll, 0x00008000, 0x00008000, 0xffff8000, + USR_OVF); + + TEST_P_OP_P(vconj, 0x0000ffff00000001LL, 0x0000ffff00000001LL, USR_CLEAR); + TEST_P_OP_P(vconj, 0x800000000000ffffLL, 0x7fff00000000ffffLL, USR_OVF); + + TEST_P_OP_PP(vxaddsubw, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0x8765432201234569LL, USR_CLEAR); + TEST_P_OP_PP(vxaddsubw, 0x7fffffff7fffffffLL, 0xffffffffffffffffLL, + 0x7fffffff7ffffffeLL, USR_OVF); + TEST_P_OP_PP(vxaddsubw, 0x800000000fffffffLL, 0x0000000a00000008LL, + 0x8000000010000009LL, USR_OVF); + + TEST_P_OP_P(vabshsat, 0x0001000afffff800LL, 0x0001000a00010800LL, + USR_CLEAR); + TEST_P_OP_P(vabshsat, 0x8000000b000c000aLL, 0x7fff000b000c000aLL, + USR_OVF); + + TEST_P_OP_PP(vnavgwr, 0x8765432101234567LL, 0x00000002ffffffffLL, + 0xc3b2a1900091a2b4LL, USR_CLEAR); + TEST_P_OP_PP(vnavgwr, 0x7fffffff8000000aLL, 0x80000000ffffffffLL, + 0x7fffffffc0000006LL, USR_OVF); + + TEST_R_OP_RI(round_ri_sat, 0x0000ffff, 2, 0x00004000, USR_CLEAR); + TEST_R_OP_RI(round_ri_sat, 0x7fffffff, 2, 0x1fffffff, USR_OVF); + + TEST_R_OP_RR(asr_r_r_sat, 0x0000ffff, 0x00000002, 0x00003fff, + USR_CLEAR); + TEST_R_OP_RR(asr_r_r_sat, 0x00ffffff, 0xfffffff5, 0x7fffffff, + USR_OVF); + TEST_R_OP_RR(asr_r_r_sat, 0x80000000, 0xfffffff5, 0x80000000, + USR_OVF); + + TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL, + 0x0000000000000000ULL, 0x0004000300030004ULL, 0xf0, + USR_CLEAR); + TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL, + 0x000affff000d0000ULL, 0x000e0003000f0004ULL, 0xcc, + USR_CLEAR); + TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL, + 0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xfc, + USR_OVF); + TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL, + 0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xf0, + USR_OVF); + + /* Floating point */ + TEST_R_OP_RR(sfmin, SF_one, SF_small_neg, SF_small_neg, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_one, SF_SNaN, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_SNaN, SF_one, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_one, SF_QNaN, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_QNaN, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmin, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmin, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfmax, SF_one, SF_small_neg, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_one, SF_SNaN, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_SNaN, SF_one, SF_one, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_one, SF_QNaN, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_QNaN, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sfmax, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmax, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfadd, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfadd, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfadd, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfadd, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfsub, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfsub, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfsub, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfsub, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sfmpy, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sfmpy, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmpy, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sfmpy, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_XR_OP_RR(sffma, SF_one, SF_one, SF_one, SF_two, USR_CLEAR); + TEST_XR_OP_RR(sffma, SF_zero, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_XR_OP_RR(sffma, SF_zero, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffma, SF_zero, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffma, SF_zero, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_XR_OP_RR(sffms, SF_one, SF_one, SF_one, SF_zero, USR_CLEAR); + TEST_XR_OP_RR(sffms, SF_zero, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_XR_OP_RR(sffms, SF_zero, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffms, SF_zero, SF_QNaN, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_XR_OP_RR(sffms, SF_zero, SF_SNaN, SF_QNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_CMP_RR(sfcmpuo, SF_one, SF_large_pos, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_INF, SF_large_pos, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_large_pos, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN_neg, SF_large_pos, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_SNaN, SF_large_pos, 0xff, USR_FPINVF); + TEST_CMP_RR(sfcmpuo, SF_SNaN_neg, SF_large_pos, 0xff, USR_FPINVF); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_QNaN, 0xff, USR_CLEAR); + TEST_CMP_RR(sfcmpuo, SF_QNaN, SF_SNaN, 0xff, USR_FPINVF); + + TEST_CMP_RR(sfcmpeq, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpeq, SF_one, SF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_RR(sfcmpgt, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpgt, SF_one, SF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_RR(sfcmpge, SF_one, SF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_RR(sfcmpge, SF_one, SF_SNaN, 0x00, USR_FPINVF); + + TEST_P_OP_PP(dfadd, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_PP(dfadd, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfadd, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfadd, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_P_OP_PP(dfsub, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_PP(dfsub, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfsub, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfsub, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + +#if CORE_IS_V67 + TEST_P_OP_PP(dfmin, DF_any, DF_small_neg, DF_small_neg, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_any, DF_SNaN, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_SNaN, DF_any, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_any, DF_QNaN, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_QNaN, DF_any, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmin, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfmin, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_P_OP_PP(dfmax, DF_any, DF_small_neg, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_any, DF_SNaN, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_SNaN, DF_any, DF_any, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_any, DF_QNaN, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_QNaN, DF_any, DF_any, USR_CLEAR); + TEST_P_OP_PP(dfmax, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); + TEST_P_OP_PP(dfmax, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + + TEST_XP_OP_PP(dfmpyhh, DF_one, DF_one, DF_one, DF_one_hh, USR_CLEAR); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_any, DF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_any, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_QNaN, DF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_XP_OP_PP(dfmpyhh, DF_zero, DF_SNaN, DF_QNaN, DF_HEX_NaN, USR_FPINVF); +#else + printf("v67 instructions skipped\n"); +#endif + + TEST_CMP_PP(dfcmpuo, DF_small_neg, DF_any, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_large_pos, DF_any, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_any, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN_neg, DF_any, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_SNaN, DF_any, 0xff, USR_FPINVF); + TEST_CMP_PP(dfcmpuo, DF_SNaN_neg, DF_any, 0xff, USR_FPINVF); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_QNaN, 0xff, USR_CLEAR); + TEST_CMP_PP(dfcmpuo, DF_QNaN, DF_SNaN, 0xff, USR_FPINVF); + + TEST_CMP_PP(dfcmpeq, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpeq, DF_any, DF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_PP(dfcmpgt, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpgt, DF_any, DF_SNaN, 0x00, USR_FPINVF); + TEST_CMP_PP(dfcmpge, DF_any, DF_QNaN, 0x00, USR_CLEAR); + TEST_CMP_PP(dfcmpge, DF_any, DF_SNaN, 0x00, USR_FPINVF); + + TEST_P_OP_R(conv_sf2df, SF_QNaN, DF_HEX_NaN, USR_CLEAR); + TEST_P_OP_R(conv_sf2df, SF_SNaN, DF_HEX_NaN, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw_chop, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2uw_chop, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w_chop, SF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_R(conv_sf2w_chop, SF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud_chop, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2ud_chop, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d_chop, SF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_R(conv_sf2d_chop, SF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + + TEST_R_OP_P(conv_df2sf, DF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_P(conv_df2sf, DF_SNaN, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_P(conv_df2uw, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2uw, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_P(conv_df2ud, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2ud, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_R_OP_P(conv_df2uw_chop, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2uw_chop, DF_SNaN, 0xffffffff, USR_FPINVF); + + /* Test for typo in HELPER(conv_df2uw_chop) */ + TEST_R_OP_P(conv_df2uw_chop, 0xffffff7f00000001ULL, 0xffffffff, USR_FPINVF); + + TEST_R_OP_P(conv_df2w_chop, DF_QNaN, 0xffffffff, USR_FPINVF); + TEST_R_OP_P(conv_df2w_chop, DF_SNaN, 0xffffffff, USR_FPINVF); + TEST_P_OP_P(conv_df2ud_chop, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2ud_chop, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d_chop, DF_QNaN, 0xffffffffffffffffULL, USR_FPINVF); + TEST_P_OP_P(conv_df2d_chop, DF_SNaN, 0xffffffffffffffffULL, USR_FPINVF); + + TEST_R_OP_R(conv_uw2sf, 0x00000001, SF_one, USR_CLEAR); + TEST_R_OP_R(conv_uw2sf, 0x010020a5, 0x4b801052, USR_FPINPF); + TEST_R_OP_R(conv_w2sf, 0x00000001, SF_one, USR_CLEAR); + TEST_R_OP_R(conv_w2sf, 0x010020a5, 0x4b801052, USR_FPINPF); + TEST_R_OP_P(conv_ud2sf, 0x0000000000000001ULL, SF_one, USR_CLEAR); + TEST_R_OP_P(conv_ud2sf, 0x00000000010020a5ULL, 0x4b801052, USR_FPINPF); + TEST_R_OP_P(conv_d2sf, 0x0000000000000001ULL, SF_one, USR_CLEAR); + TEST_R_OP_P(conv_d2sf, 0x00000000010020a5ULL, 0x4b801052, USR_FPINPF); + + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_one, 1, SF_four, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_QNaN, SF_one, SF_one, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_QNaN, SF_one, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_QNaN, 1, SF_HEX_NaN, + USR_CLEAR); + TEST_XR_OP_RRp(sffma_sc, SF_SNaN, SF_one, SF_one, 1, SF_HEX_NaN, + USR_FPINVF); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_SNaN, SF_one, 1, SF_HEX_NaN, + USR_FPINVF); + TEST_XR_OP_RRp(sffma_sc, SF_one, SF_one, SF_SNaN, 1, SF_HEX_NaN, + USR_FPINVF); + + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_one, SF_one_recip, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_QNaN, SF_one, SF_HEX_NaN, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_QNaN, SF_HEX_NaN, 0x00, + USR_CLEAR); + TEST_Rp_OP_RR(sfrecipa, SF_one, SF_SNaN, SF_HEX_NaN, 0x00, + USR_FPINVF); + TEST_Rp_OP_RR(sfrecipa, SF_SNaN, SF_one, SF_HEX_NaN, 0x00, + USR_FPINVF); + + TEST_R_OP_RR(sffixupn, SF_one, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_QNaN, SF_one, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupn, SF_SNaN, SF_one, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sffixupn, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_RR(sffixupd, SF_one, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_QNaN, SF_one, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_one, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_RR(sffixupd, SF_SNaN, SF_one, SF_HEX_NaN, USR_FPINVF); + TEST_R_OP_RR(sffixupd, SF_one, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_R_OP_R(sffixupr, SF_one, SF_one, USR_CLEAR); + TEST_R_OP_R(sffixupr, SF_QNaN, SF_HEX_NaN, USR_CLEAR); + TEST_R_OP_R(sffixupr, SF_SNaN, SF_HEX_NaN, USR_FPINVF); + + TEST_Rp_OP_R(sfinvsqrta, SF_one, SF_one_invsqrta, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_zero, SF_one, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_QNaN, SF_HEX_NaN, 0x00, USR_CLEAR); + TEST_Rp_OP_R(sfinvsqrta, SF_small_neg, SF_HEX_NaN, 0x00, USR_FPINVF); + TEST_Rp_OP_R(sfinvsqrta, SF_SNaN, SF_HEX_NaN, 0x00, USR_FPINVF); + + puts(err ? "FAIL" : "PASS"); + return err; +} |