From 6b8b25575570ffde37cc8997af096514b929779d Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 26 Oct 2021 08:33:41 +0100 Subject: x86_64: Implement V1TI mode shifts/rotates by a constant This patch provides RTL expanders to implement logical shifts and rotates of 128-bit values (stored in vector integer registers) by constant bit counts. Previously, GCC would transfer these values to a pair of integer registers (TImode) via memory to perform the operation, then transfer the result back via memory. Instead these operations are now expanded using (between 1 and 5) SSE2 vector instructions. Logical shifts by multiples of 8 can be implemented using x86_64's pslldq/psrldq instruction: ashl_8: pslldq $1, %xmm0 ret lshr_32: psrldq $4, %xmm0 ret Logical shifts by greater than 64 can use pslldq/psrldq $8, followed by a psllq/psrlq for the remaining bits: ashl_111: pslldq $8, %xmm0 psllq $47, %xmm0 ret lshr_127: psrldq $8, %xmm0 psrlq $63, %xmm0 ret The remaining logical shifts make use of the following idiom: ashl_1: movdqa %xmm0, %xmm1 psllq $1, %xmm0 pslldq $8, %xmm1 psrlq $63, %xmm1 por %xmm1, %xmm0 ret lshr_15: movdqa %xmm0, %xmm1 psrlq $15, %xmm0 psrldq $8, %xmm1 psllq $49, %xmm1 por %xmm1, %xmm0 ret Rotates by multiples of 32 can use x86_64's pshufd: rotr_32: pshufd $57, %xmm0, %xmm0 ret rotr_64: pshufd $78, %xmm0, %xmm0 ret rotr_96: pshufd $147, %xmm0, %xmm0 ret Rotates by multiples of 8 (other than multiples of 32) can make use of both pslldq and psrldq, followed by por: rotr_8: movdqa %xmm0, %xmm1 psrldq $1, %xmm0 pslldq $15, %xmm1 por %xmm1, %xmm0 ret rotr_112: movdqa %xmm0, %xmm1 psrldq $14, %xmm0 pslldq $2, %xmm1 por %xmm1, %xmm0 ret And the remaining rotates use one or two pshufd, followed by a psrld/pslld/por sequence: rotr_1: movdqa %xmm0, %xmm1 pshufd $57, %xmm0, %xmm0 psrld $1, %xmm1 pslld $31, %xmm0 por %xmm1, %xmm0 ret rotr_63: pshufd $78, %xmm0, %xmm1 pshufd $57, %xmm0, %xmm0 pslld $1, %xmm1 psrld $31, %xmm0 por %xmm1, %xmm0 ret rotr_111: pshufd $147, %xmm0, %xmm1 pslld $17, %xmm0 psrld $15, %xmm1 por %xmm1, %xmm0 ret The new test case, sse2-v1ti-shift.c, is a run-time check to confirm that the results of V1TImode shifts/rotates by constants, exactly match the expected results of TImode operations, for various input test vectors. 2021-10-26 Roger Sayle gcc/ChangeLog * config/i386/i386-expand.c (ix86_expand_v1ti_shift): New helper function to expand V1TI mode logical shifts by integer constants. (ix86_expand_v1ti_rotate): New helper function to expand V1TI mode rotations by integer constants. * config/i386/i386-protos.h (ix86_expand_v1ti_shift, ix86_expand_v1ti_rotate): Prototype new functions here. * config/i386/sse.md (ashlv1ti3, lshrv1ti3, rotlv1ti3, rotrv1ti3): New TARGET_SSE2 expanders to implement V1TI shifts and rotations. gcc/testsuite/ChangeLog * gcc.target/i386/sse2-v1ti-shift.c: New test case. --- gcc/config/i386/sse.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'gcc/config/i386/sse.md') diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 431236a..bdc6067 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15075,6 +15075,50 @@ operands[4] = gen_lowpart (mode, operands[3]); }) +(define_expand "ashlv1ti3" + [(set (match_operand:V1TI 0 "register_operand") + (ashift:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SSE2" +{ + ix86_expand_v1ti_shift (ASHIFT, operands); + DONE; +}) + +(define_expand "lshrv1ti3" + [(set (match_operand:V1TI 0 "register_operand") + (lshiftrt:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SSE2" +{ + ix86_expand_v1ti_shift (LSHIFTRT, operands); + DONE; +}) + +(define_expand "rotlv1ti3" + [(set (match_operand:V1TI 0 "register_operand") + (rotate:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SSE2" +{ + ix86_expand_v1ti_rotate (ROTATE, operands); + DONE; +}) + +(define_expand "rotrv1ti3" + [(set (match_operand:V1TI 0 "register_operand") + (rotatert:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SSE2" +{ + ix86_expand_v1ti_rotate (ROTATERT, operands); + DONE; +}) + (define_insn "avx512bw_3" [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v") (any_lshift:VIMAX_AVX512VL -- cgit v1.1