aboutsummaryrefslogtreecommitdiff
path: root/gcc/d/expr.cc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2023-07-01 08:24:33 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2023-07-01 08:24:33 +0100
commit620a35b24a2b6edb67720ec42864b571a972fa45 (patch)
treecece852eb043ca089930e2c88e33aa3684feed86 /gcc/d/expr.cc
parent02460c0b8c9000359a09440f9532664a7835f158 (diff)
downloadgcc-620a35b24a2b6edb67720ec42864b571a972fa45.zip
gcc-620a35b24a2b6edb67720ec42864b571a972fa45.tar.gz
gcc-620a35b24a2b6edb67720ec42864b571a972fa45.tar.bz2
i386: Add STV support for DImode and SImode rotations by constant.
This patch implements scalar-to-vector (STV) support for DImode and SImode rotations by constant bit counts. Scalar rotations are almost always optimal on x86, requiring only one or two instructions, but it is also possible to implement these efficiently with SSE2, requiring only one or two instructions for SImode rotations and at most 3 instructions for DImode rotations. This allows GCC to STV rotations with a small or no penalty if there are other (net) benefits to converting a chain. An example of the benefits is shown below, which is based upon the BLAKE2 cryptographic hash function: unsigned long long a,b,c,d; unsigned long rot(unsigned long long x, int y) { return (x<<y) | (x>>(64-y)); } void foo() { d = rot(d ^ a,32); c = c + d; b = rot(b ^ c,24); a = a + b; d = rot(d ^ a,16); c = c + d; b = rot(b ^ c,63); } where with -m32 -O2 -msse2 Before (59 insns, 247 bytes): foo: pushl %edi xorl %edx, %edx pushl %esi pushl %ebx subl $16, %esp movq a, %xmm1 movq d, %xmm0 movq b, %xmm2 pxor %xmm1, %xmm0 psrlq $32, %xmm0 movd %xmm0, %eax movd %edx, %xmm0 movd %eax, %xmm3 punpckldq %xmm0, %xmm3 movq c, %xmm0 paddq %xmm3, %xmm0 pxor %xmm0, %xmm2 movd %xmm2, %ecx psrlq $32, %xmm2 movd %xmm2, %ebx movl %ecx, %eax shldl $24, %ebx, %ecx shldl $24, %eax, %ebx movd %ebx, %xmm4 movd %ecx, %xmm2 punpckldq %xmm4, %xmm2 movdqa .LC0, %xmm4 pand %xmm4, %xmm2 paddq %xmm2, %xmm1 movq %xmm1, a pxor %xmm3, %xmm1 movd %xmm1, %esi psrlq $32, %xmm1 movd %xmm1, %edi movl %esi, %eax shldl $16, %edi, %esi shldl $16, %eax, %edi movd %esi, %xmm1 movd %edi, %xmm3 punpckldq %xmm3, %xmm1 pand %xmm4, %xmm1 movq %xmm1, d paddq %xmm1, %xmm0 movq %xmm0, c pxor %xmm2, %xmm0 movd %xmm0, 8(%esp) psrlq $32, %xmm0 movl 8(%esp), %eax movd %xmm0, 12(%esp) movl 12(%esp), %edx shrdl $1, %edx, %eax xorl %edx, %edx movl %eax, b movl %edx, b+4 addl $16, %esp popl %ebx popl %esi popl %edi ret After (32 insns, 165 bytes): movq a, %xmm1 xorl %edx, %edx movq d, %xmm0 movq b, %xmm2 movdqa .LC0, %xmm4 pxor %xmm1, %xmm0 psrlq $32, %xmm0 movd %xmm0, %eax movd %edx, %xmm0 movd %eax, %xmm3 punpckldq %xmm0, %xmm3 movq c, %xmm0 paddq %xmm3, %xmm0 pxor %xmm0, %xmm2 pshufd $68, %xmm2, %xmm2 psrldq $5, %xmm2 pand %xmm4, %xmm2 paddq %xmm2, %xmm1 movq %xmm1, a pxor %xmm3, %xmm1 pshuflw $147, %xmm1, %xmm1 pand %xmm4, %xmm1 movq %xmm1, d paddq %xmm1, %xmm0 movq %xmm0, c pxor %xmm2, %xmm0 pshufd $20, %xmm0, %xmm0 psrlq $1, %xmm0 pshufd $136, %xmm0, %xmm0 pand %xmm4, %xmm0 movq %xmm0, b ret 2023-07-01 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386-features.cc (compute_convert_gain): Provide gains/costs for ROTATE and ROTATERT (by an integer constant). (general_scalar_chain::convert_rotate): New helper function to convert a DImode or SImode rotation by an integer constant into SSE vector form. (general_scalar_chain::convert_insn): Call the new convert_rotate for ROTATE and ROTATERT. (general_scalar_to_vector_candidate_p): Consider ROTATE and ROTATERT to be candidates if the second operand is an integer constant, valid for a rotation (or shift) in the given mode. * config/i386/i386-features.h (general_scalar_chain): Add new helper method convert_rotate. gcc/testsuite/ChangeLog * gcc.target/i386/rotate-6.c: New test case. * gcc.target/i386/sse2-stv-1.c: Likewise.
Diffstat (limited to 'gcc/d/expr.cc')
0 files changed, 0 insertions, 0 deletions