diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-01 08:24:33 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-07-01 08:24:33 +0100 |
commit | 620a35b24a2b6edb67720ec42864b571a972fa45 (patch) | |
tree | cece852eb043ca089930e2c88e33aa3684feed86 /gcc/d/expr.cc | |
parent | 02460c0b8c9000359a09440f9532664a7835f158 (diff) | |
download | gcc-620a35b24a2b6edb67720ec42864b571a972fa45.zip gcc-620a35b24a2b6edb67720ec42864b571a972fa45.tar.gz gcc-620a35b24a2b6edb67720ec42864b571a972fa45.tar.bz2 |
i386: Add STV support for DImode and SImode rotations by constant.
This patch implements scalar-to-vector (STV) support for DImode and SImode
rotations by constant bit counts. Scalar rotations are almost always
optimal on x86, requiring only one or two instructions, but it is also
possible to implement these efficiently with SSE2, requiring only one
or two instructions for SImode rotations and at most 3 instructions for
DImode rotations. This allows GCC to STV rotations with a small or no
penalty if there are other (net) benefits to converting a chain. An
example of the benefits is shown below, which is based upon the BLAKE2
cryptographic hash function:
unsigned long long a,b,c,d;
unsigned long rot(unsigned long long x, int y)
{
return (x<<y) | (x>>(64-y));
}
void foo()
{
d = rot(d ^ a,32);
c = c + d;
b = rot(b ^ c,24);
a = a + b;
d = rot(d ^ a,16);
c = c + d;
b = rot(b ^ c,63);
}
where with -m32 -O2 -msse2
Before (59 insns, 247 bytes):
foo: pushl %edi
xorl %edx, %edx
pushl %esi
pushl %ebx
subl $16, %esp
movq a, %xmm1
movq d, %xmm0
movq b, %xmm2
pxor %xmm1, %xmm0
psrlq $32, %xmm0
movd %xmm0, %eax
movd %edx, %xmm0
movd %eax, %xmm3
punpckldq %xmm0, %xmm3
movq c, %xmm0
paddq %xmm3, %xmm0
pxor %xmm0, %xmm2
movd %xmm2, %ecx
psrlq $32, %xmm2
movd %xmm2, %ebx
movl %ecx, %eax
shldl $24, %ebx, %ecx
shldl $24, %eax, %ebx
movd %ebx, %xmm4
movd %ecx, %xmm2
punpckldq %xmm4, %xmm2
movdqa .LC0, %xmm4
pand %xmm4, %xmm2
paddq %xmm2, %xmm1
movq %xmm1, a
pxor %xmm3, %xmm1
movd %xmm1, %esi
psrlq $32, %xmm1
movd %xmm1, %edi
movl %esi, %eax
shldl $16, %edi, %esi
shldl $16, %eax, %edi
movd %esi, %xmm1
movd %edi, %xmm3
punpckldq %xmm3, %xmm1
pand %xmm4, %xmm1
movq %xmm1, d
paddq %xmm1, %xmm0
movq %xmm0, c
pxor %xmm2, %xmm0
movd %xmm0, 8(%esp)
psrlq $32, %xmm0
movl 8(%esp), %eax
movd %xmm0, 12(%esp)
movl 12(%esp), %edx
shrdl $1, %edx, %eax
xorl %edx, %edx
movl %eax, b
movl %edx, b+4
addl $16, %esp
popl %ebx
popl %esi
popl %edi
ret
After (32 insns, 165 bytes):
movq a, %xmm1
xorl %edx, %edx
movq d, %xmm0
movq b, %xmm2
movdqa .LC0, %xmm4
pxor %xmm1, %xmm0
psrlq $32, %xmm0
movd %xmm0, %eax
movd %edx, %xmm0
movd %eax, %xmm3
punpckldq %xmm0, %xmm3
movq c, %xmm0
paddq %xmm3, %xmm0
pxor %xmm0, %xmm2
pshufd $68, %xmm2, %xmm2
psrldq $5, %xmm2
pand %xmm4, %xmm2
paddq %xmm2, %xmm1
movq %xmm1, a
pxor %xmm3, %xmm1
pshuflw $147, %xmm1, %xmm1
pand %xmm4, %xmm1
movq %xmm1, d
paddq %xmm1, %xmm0
movq %xmm0, c
pxor %xmm2, %xmm0
pshufd $20, %xmm0, %xmm0
psrlq $1, %xmm0
pshufd $136, %xmm0, %xmm0
pand %xmm4, %xmm0
movq %xmm0, b
ret
2023-07-01 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/i386/i386-features.cc (compute_convert_gain): Provide
gains/costs for ROTATE and ROTATERT (by an integer constant).
(general_scalar_chain::convert_rotate): New helper function to
convert a DImode or SImode rotation by an integer constant into
SSE vector form.
(general_scalar_chain::convert_insn): Call the new convert_rotate
for ROTATE and ROTATERT.
(general_scalar_to_vector_candidate_p): Consider ROTATE and
ROTATERT to be candidates if the second operand is an integer
constant, valid for a rotation (or shift) in the given mode.
* config/i386/i386-features.h (general_scalar_chain): Add new
helper method convert_rotate.
gcc/testsuite/ChangeLog
* gcc.target/i386/rotate-6.c: New test case.
* gcc.target/i386/sse2-stv-1.c: Likewise.
Diffstat (limited to 'gcc/d/expr.cc')
0 files changed, 0 insertions, 0 deletions