diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2023-07-11 09:26:24 +0100 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2023-09-15 13:57:00 +0000 |
commit | c6f0dcb1fdee2b1c2af15cea75bee98327308a8e (patch) | |
tree | 8ad932f904ddee51a973239567920596152b8dac | |
parent | cf1b2cab835f816915c8a170ec783922bc4e56a3 (diff) | |
download | qemu-c6f0dcb1fdee2b1c2af15cea75bee98327308a8e.zip qemu-c6f0dcb1fdee2b1c2af15cea75bee98327308a8e.tar.gz qemu-c6f0dcb1fdee2b1c2af15cea75bee98327308a8e.tar.bz2 |
target/arm: Use clmul_16* routines
Use generic routines for 16-bit carry-less multiply.
Remove our local version of pmull_w.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | target/arm/tcg/mve_helper.c | 8 | ||||
-rw-r--r-- | target/arm/tcg/vec_helper.c | 13 | ||||
-rw-r--r-- | target/arm/tcg/vec_internal.h | 6 |
3 files changed, 2 insertions, 25 deletions
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c index 96ddfb4..c666a96 100644 --- a/target/arm/tcg/mve_helper.c +++ b/target/arm/tcg/mve_helper.c @@ -985,14 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL) * Polynomial multiply. We can always do this generating 64 bits * of the result at a time, so we don't need to use DO_2OP_L. */ -#define VMULLPW_MASK 0x0000ffff0000ffffULL -#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK) -#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16) - DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even) DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd) -DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW) -DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW) +DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even) +DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd) /* * Because the computation type is at least twice as large as required, diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index cd630ff..5def86b 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -2029,19 +2029,6 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) clear_tail(d, opr_sz, simd_maxsz(desc)); } -uint64_t pmull_w(uint64_t op1, uint64_t op2) -{ - uint64_t result = 0; - int i; - for (i = 0; i < 16; ++i) { - uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff; - result ^= op2 & mask; - op1 >>= 1; - op2 <<= 1; - } - return result; -} - void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) { int hi = simd_data(desc); diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index c4afba6..3ca1b94 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -219,12 +219,6 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); -/* - * 16 x 16 -> 32 vector polynomial multiply where the inputs are - * in the low 16 bits of each 32-bit element - */ -uint64_t pmull_w(uint64_t op1, uint64_t op2); - /** * bfdotadd: * @sum: addend |