From 55cd237908dcfa48ef12b4f7cd9e41f154139bd2 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 15 May 2019 15:18:41 +0000 Subject: i386: Emulate MMX maskmovq with SSE2 maskmovdqu Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by zero-extending source and mask operands to 128 bits. Handle unmapped bits 64:127 at memory address by adjusting source and mask operands together with memory address. PR target/89021 * config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2 maskmovdqu for __MMX_WITH_SSE__. From-SVN: r271234 --- gcc/ChangeLog | 6 +++++ gcc/config/i386/xmmintrin.h | 61 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e46067f..8f3e5f8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,12 @@ 2019-05-15 H.J. Lu PR target/89021 + * config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2 + maskmovdqu for __MMX_WITH_SSE__. + +2019-05-15 H.J. Lu + + PR target/89021 * config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_umulv4hi3_highpart): Add SSE emulation. diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 5828437..a915f6c 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) { +#ifdef __MMX_WITH_SSE__ + /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits + 64:127 at address __P. */ + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + typedef char __v16qi __attribute__ ((__vector_size__ (16))); + /* Zero-extend __A and __N to 128 bits. */ + __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 }; + __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 }; + + /* Check the alignment of __P. */ + __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf; + if (offset) + { + /* If the misalignment of __P > 8, subtract __P by 8 bytes. + Otherwise, subtract __P by the misalignment. */ + if (offset > 8) + offset = 8; + __P = (char *) (((__SIZE_TYPE__) __P) - offset); + + /* Shift __A128 and __N128 to the left by the adjustment. */ + switch (offset) + { + case 1: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8); + break; + case 2: + __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8); + break; + case 3: + __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8); + break; + case 4: + __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8); + break; + case 5: + __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8); + break; + case 6: + __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8); + break; + case 7: + __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8); + break; + case 8: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8); + break; + default: + break; + } + } + __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P); +#else __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); +#endif } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -- cgit v1.1