aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/sse.md
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2021-10-23 10:06:06 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2021-10-23 10:06:06 +0100
commit36051875168db600c103277499b092acb4755eab (patch)
tree08a72b36ed6a93d8119c805f51bfa5393607eafe /gcc/config/i386/sse.md
parent693abdb66aba25f3fb25c3cd8d65dbb64ecd37a0 (diff)
downloadgcc-36051875168db600c103277499b092acb4755eab.zip
gcc-36051875168db600c103277499b092acb4755eab.tar.gz
gcc-36051875168db600c103277499b092acb4755eab.tar.bz2
x86_64: Add insn patterns for V1TI mode logic operations.
On x86_64, V1TI mode holds a 128-bit integer value in a (vector) SSE register (where regular TI mode uses a pair of 64-bit general purpose scalar registers). This patch improves the implementation of AND, IOR, XOR and NOT on these values. The benefit is demonstrated by the following simple test program: typedef unsigned __int128 v1ti __attribute__ ((__vector_size__ (16))); v1ti and(v1ti x, v1ti y) { return x & y; } v1ti ior(v1ti x, v1ti y) { return x | y; } v1ti xor(v1ti x, v1ti y) { return x ^ y; } v1ti not(v1ti x) { return ~x; } For which GCC currently generates the rather large: and: movdqa %xmm0, %xmm2 movq %xmm1, %rdx movq %xmm0, %rax andq %rdx, %rax movhlps %xmm2, %xmm3 movhlps %xmm1, %xmm4 movq %rax, %xmm0 movq %xmm4, %rdx movq %xmm3, %rax andq %rdx, %rax movq %rax, %xmm5 punpcklqdq %xmm5, %xmm0 ret ior: movdqa %xmm0, %xmm2 movq %xmm1, %rdx movq %xmm0, %rax orq %rdx, %rax movhlps %xmm2, %xmm3 movhlps %xmm1, %xmm4 movq %rax, %xmm0 movq %xmm4, %rdx movq %xmm3, %rax orq %rdx, %rax movq %rax, %xmm5 punpcklqdq %xmm5, %xmm0 ret xor: movdqa %xmm0, %xmm2 movq %xmm1, %rdx movq %xmm0, %rax xorq %rdx, %rax movhlps %xmm2, %xmm3 movhlps %xmm1, %xmm4 movq %rax, %xmm0 movq %xmm4, %rdx movq %xmm3, %rax xorq %rdx, %rax movq %rax, %xmm5 punpcklqdq %xmm5, %xmm0 ret not: movdqa %xmm0, %xmm1 movq %xmm0, %rax notq %rax movhlps %xmm1, %xmm2 movq %rax, %xmm0 movq %xmm2, %rax notq %rax movq %rax, %xmm3 punpcklqdq %xmm3, %xmm0 ret with this patch we now generate the much more efficient: and: pand %xmm1, %xmm0 ret ior: por %xmm1, %xmm0 ret xor: pxor %xmm1, %xmm0 ret not: pcmpeqd %xmm1, %xmm1 pxor %xmm1, %xmm0 ret For my first few attempts at this patch I tried adding V1TI to the existing VI and VI12_AVX_512F mode iterators, but these then have dependencies on other iterators (and attributes), and so on until everything ties itself into a knot, as V1TI mode isn't really a first-class vector mode on x86_64. Hence I ultimately opted to use simple stand-alone patterns (as used by the existing TF mode support). 2021-10-23 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/sse.md (<any_logic>v1ti3): New define_insn to implement V1TImode AND, IOR and XOR on TARGET_SSE2 (and above). (one_cmplv1ti2): New define expand. gcc/testsuite/ChangeLog * gcc.target/i386/sse2-v1ti-logic.c: New test case. * gcc.target/i386/sse2-v1ti-logic-2.c: New test case.
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r--gcc/config/i386/sse.md25
1 files changed, 25 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fbf056b..f37c5c0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16268,6 +16268,31 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "<code>v1ti3"
+ [(set (match_operand:V1TI 0 "register_operand" "=x,x,v")
+ (any_logic:V1TI
+ (match_operand:V1TI 1 "register_operand" "%0,x,v")
+ (match_operand:V1TI 2 "vector_operand" "xBm,xm,vm")))]
+ "TARGET_SSE2"
+ "@
+ p<logic>\t{%2, %0|%0, %2}
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx,avx")
+ (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_expand "one_cmplv1ti2"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (xor:V1TI (match_operand:V1TI 1 "register_operand")
+ (match_dup 2)))]
+ "TARGET_SSE2"
+{
+ operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
+})
+
(define_mode_iterator AVX512ZEXTMASK
[(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])