aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2023-06-21 08:03:05 +0200
committerJan Beulich <jbeulich@suse.com>2023-06-21 08:03:05 +0200
commit864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8 (patch)
tree693e8c381d27d59d961845d0403b849243934c7e /gcc/testsuite
parent67061960b6ccdb706b11613a27c4ae30ee81c2c5 (diff)
downloadgcc-864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8.zip
gcc-864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8.tar.gz
gcc-864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8.tar.bz2
x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F
There's no reason to constrain this to AVX512VL, unless instructed so by -mprefer-vector-width=, as the wider operation is unusable for more narrow operands only when the possible memory source is a non-broadcast one. This way even the scalar copysign<mode>3 can benefit from the operation being a single-insn one (leaving aside moves which the compiler decides to insert for unclear reasons, and leaving aside the fact that bcst_mem_operand() is too restrictive for broadcast to be embedded right into VPTERNLOG*). While there also bring *<avx512>_vternlog<mode>_all's in sync with that of the three splitters. Along with this also request value duplication in ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating excess space allocation in .rodata.*, filled with zeros which are never read. gcc/ * config/i386/i386-expand.cc (ix86_expand_copysign): Request value duplication by ix86_build_signbit_mask() when AVX512F and not HFmode. * config/i386/sse.md (*<avx512>_vternlog<mode>_all): Convert to 2-alternative form. Adjust "mode" attribute. Add "enabled" attribute. (*<avx512>_vpternlog<mode>_1): Also permit when TARGET_AVX512F && !TARGET_PREFER_AVX256. (*<avx512>_vpternlog<mode>_2): Likewise. (*<avx512>_vpternlog<mode>_3): Likewise. gcc/testsuite/ * gcc.target/i386/avx512f-copysign.c: New test.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-copysign.c32
1 files changed, 32 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-copysign.c b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c
new file mode 100644
index 0000000..51ca028
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$(?:216|228|0xd8|0xe4)," 5 } } */
+
+double cs_df (double x, double y)
+{
+ return __builtin_copysign (x, y);
+}
+
+float cs_sf (float x, float y)
+{
+ return __builtin_copysignf (x, y);
+}
+
+typedef double __attribute__ ((vector_size (16))) v2df;
+typedef double __attribute__ ((vector_size (32))) v4df;
+typedef double __attribute__ ((vector_size (64))) v8df;
+
+v2df cs_v2df (v2df x, v2df y)
+{
+ return __builtin_ia32_copysignpd (x, y);
+}
+
+v4df cs_v4df (v4df x, v4df y)
+{
+ return __builtin_ia32_copysignpd256 (x, y);
+}
+
+v8df cs_v8df (v8df x, v8df y)
+{
+ return __builtin_ia32_copysignpd512 (x, y);
+}