diff options
-rw-r--r-- | linux-user/elfload.c | 1 | ||||
-rw-r--r-- | target-arm/cpu.c | 1 | ||||
-rw-r--r-- | target-arm/cpu.h | 1 | ||||
-rw-r--r-- | target-arm/helper-a64.c | 30 | ||||
-rw-r--r-- | target-arm/helper-a64.h | 2 | ||||
-rw-r--r-- | target-arm/helper.h | 3 | ||||
-rw-r--r-- | target-arm/neon_helper.c | 30 | ||||
-rw-r--r-- | target-arm/translate.c | 26 |
8 files changed, 61 insertions, 33 deletions
diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 9bda262..3241fec 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void) uint32_t hwcaps = 0; GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES); + GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL); GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2); GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32); diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 753f6cb..fb9c12d 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_V8_AES); set_feature(env, ARM_FEATURE_V8_SHA1); set_feature(env, ARM_FEATURE_V8_SHA256); + set_feature(env, ARM_FEATURE_V8_PMULL); } if (arm_feature(env, ARM_FEATURE_V7)) { set_feature(env, ARM_FEATURE_VAPA); diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 14d5f21..79e7f82 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -637,6 +637,7 @@ enum arm_features { ARM_FEATURE_EL3, /* has EL3 Secure monitor support */ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index cccda74..f0d2722 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, return result; } -/* Helper function for 64 bit polynomial multiply case: - * perform PolynomialMult(op1, op2) and return either the top or - * bottom half of the 128 bit result. - */ -uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - for (bitnum = 0; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 << bitnum; - } - } - return res; -} -uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) -{ - int bitnum; - uint64_t res = 0; - - /* bit 0 of op1 can't influence the high 64 bits at all */ - for (bitnum = 1; bitnum < 64; bitnum++) { - if (op1 & (1ULL << bitnum)) { - res ^= op2 >> (64 - bitnum); - } - } - return res; -} - /* 64bit/double versions of the neon float compare functions */ uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) { diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index 3f05bed..8de7536 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) -DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) diff --git a/target-arm/helper.h b/target-arm/helper.h index 113b09d..0ef8fca 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(dc_zva, void, env, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 492e500..47d13e9 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rd] = make_float64(d0); } + +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} diff --git a/target-arm/translate.c b/target-arm/translate.c index 7124606..41c3fc7 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins {0, 0, 0, 9}, /* VQDMLSL */ {0, 0, 0, 0}, /* Integer VMULL */ {0, 0, 0, 1}, /* VQDMULL */ - {0, 0, 0, 15}, /* Polynomial VMULL */ + {0, 0, 0, 0xa}, /* Polynomial VMULL */ {0, 0, 0, 7}, /* Reserved: always UNDEF */ }; @@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins return 1; } + /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) + * outside the loop below as it only performs a single pass. + */ + if (op == 14 && size == 2) { + TCGv_i64 tcg_rn, tcg_rm, tcg_rd; + + if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) { + return 1; + } + tcg_rn = tcg_temp_new_i64(); + tcg_rm = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + neon_load_reg64(tcg_rn, rn); + neon_load_reg64(tcg_rm, rm); + gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd); + gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm); + neon_store_reg64(tcg_rd, rd + 1); + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rm); + tcg_temp_free_i64(tcg_rd); + return 0; + } + /* Avoid overlapping operands. Wide source operands are always aligned so will never overlap with wide destinations in problematic ways. */ |