diff options
author | dmitry dulesov <dmitry.dulesov@gmail.com> | 2020-02-01 02:59:07 +0300 |
---|---|---|
committer | Dmitry Belyavskiy <beldmit@users.noreply.github.com> | 2020-02-02 21:05:29 +0300 |
commit | 211489fa492c33247383f71ce269858f512874ec (patch) | |
tree | 4a6d1b6a468bb6e589ffa3ebdc74dc8a057b33f2 /gosthash2012.c | |
parent | 88a44b67c76ce938f124e22e6a882780a98731fb (diff) | |
download | gost-engine-211489fa492c33247383f71ce269858f512874ec.zip gost-engine-211489fa492c33247383f71ce269858f512874ec.tar.gz gost-engine-211489fa492c33247383f71ce269858f512874ec.tar.bz2 |
gosthash2012: Improve SIMD implementation
Allow aligned/unaligned access. Use better intrinsics and other
small optimizations.
Committed-by: Vitaly Chikunov <vt@altlinux.org>
Diffstat (limited to 'gosthash2012.c')
-rw-r--r-- | gosthash2012.c | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/gosthash2012.c b/gosthash2012.c index d6cde21..6399a9e 100644 --- a/gosthash2012.c +++ b/gosthash2012.c @@ -118,7 +118,7 @@ static void g(union uint512_u *h, const union uint512_u * RESTRICT N, LOAD(N, xmm0, xmm2, xmm4, xmm6); XLPS128M(h, xmm0, xmm2, xmm4, xmm6); - LOAD(m, xmm1, xmm3, xmm5, xmm7); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); for (i = 0; i < 11; i++) @@ -128,12 +128,10 @@ static void g(union uint512_u *h, const union uint512_u * RESTRICT N, X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); X128M(h, xmm0, xmm2, xmm4, xmm6); - X128M(m, xmm0, xmm2, xmm4, xmm6); - - UNLOAD(h, xmm0, xmm2, xmm4, xmm6); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); + X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); - /* Restore the Floating-point status on the CPU */ - _mm_empty(); + STORE(h, xmm0, xmm2, xmm4, xmm6); #else union uint512_u Ki, data; unsigned int i; |