aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2018-04-12 13:17:23 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2018-04-12 13:17:23 +0200
commitdcd88195e7c18ad57165f24b7f15e2fa6d239876 (patch)
treeef6d60a25efd9ae462b61bb5fcabcbd03584c6c6 /gcc
parentb9dd1a79284dd9883c1eec0412145108135e3d75 (diff)
downloadgcc-dcd88195e7c18ad57165f24b7f15e2fa6d239876.zip
gcc-dcd88195e7c18ad57165f24b7f15e2fa6d239876.tar.gz
gcc-dcd88195e7c18ad57165f24b7f15e2fa6d239876.tar.bz2
re PR target/85328 (accessing ymm16 with non-avx512 instruction form)
PR target/85328 * config/i386/sse.md (<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name> split, <mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name> split, vec_extract_lo_<mode><mask_name> split, vec_extract_lo_v32hi, vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg and output is a reg, avoid creating invalid lowpart subreg, but instead split into a 512-bit move. Don't split if not AVX512VL, input is xmm16+ reg and output is a mem. (vec_extract_lo_<mode><mask_name>, vec_extract_lo_v32hi, vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is xmm16+ reg and output is a mem. * gcc.target/i386/pr85328.c: New test. From-SVN: r259344
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/i386/sse.md98
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr85328.c18
4 files changed, 123 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5d10cbf..487d401 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2018-04-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85328
+ * config/i386/sse.md
+ (<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name> split,
+ <mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name> split,
+ vec_extract_lo_<mode><mask_name> split, vec_extract_lo_v32hi,
+ vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg
+ and output is a reg, avoid creating invalid lowpart subreg, but
+ instead split into a 512-bit move. Don't split if not AVX512VL,
+ input is xmm16+ reg and output is a mem.
+ (vec_extract_lo_<mode><mask_name>, vec_extract_lo_v32hi,
+ vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is
+ xmm16+ reg and output is a mem.
+
2018-04-12 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.c (s390_output_indirect_thunk_function): Check
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8c970e0..c3345d0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7361,9 +7361,21 @@
(vec_select:<ssequartermode>
(match_operand:V8FI 1 "register_operand")
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_AVX512DQ && reload_completed"
+ "TARGET_AVX512DQ
+ && reload_completed
+ && (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+{
+ if (!TARGET_AVX512VL
+ && REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1]))
+ operands[0]
+ = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
+ else
+ operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
+})
(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
[(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
@@ -7394,9 +7406,21 @@
(match_operand:V16FI 1 "register_operand")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_AVX512F && reload_completed"
+ "TARGET_AVX512F
+ && reload_completed
+ && (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+{
+ if (!TARGET_AVX512VL
+ && REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1]))
+ operands[0]
+ = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
+ else
+ operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
+})
(define_mode_attr extract_type_2
[(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
@@ -7639,7 +7663,10 @@
&& <mask_mode512bit_condition>
&& (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
- if (<mask_applied>)
+ if (<mask_applied>
+ || (!TARGET_AVX512VL
+ && !REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1])))
return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
else
return "#";
@@ -7654,9 +7681,20 @@
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
- && reload_completed"
+ && reload_completed
+ && (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
+{
+ if (!TARGET_AVX512VL
+ && REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1]))
+ operands[0]
+ = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
+ else
+ operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+})
(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
@@ -7828,10 +7866,27 @@
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
"TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
+{
+ if (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))
+ return "#";
+ else
+ return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
+}
+ "&& reload_completed
+ && (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (V16HImode, operands[1]);")
+{
+ if (!TARGET_AVX512VL
+ && REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1]))
+ operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
+ else
+ operands[1] = gen_lowpart (V16HImode, operands[1]);
+})
(define_insn "vec_extract_hi_v32hi"
[(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
@@ -7913,10 +7968,27 @@
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
"TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
+{
+ if (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))
+ return "#";
+ else
+ return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
+}
+ "&& reload_completed
+ && (TARGET_AVX512VL
+ || REG_P (operands[0])
+ || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
- "operands[1] = gen_lowpart (V32QImode, operands[1]);")
+{
+ if (!TARGET_AVX512VL
+ && REG_P (operands[0])
+ && EXT_REX_SSE_REG_P (operands[1]))
+ operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
+ else
+ operands[1] = gen_lowpart (V32QImode, operands[1]);
+})
(define_insn "vec_extract_hi_v64qi"
[(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d6cdfd4..0309915 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-04-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85328
+ * gcc.target/i386/pr85328.c: New test.
+
2018-04-12 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/nobp-no-dwarf2-cfi.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr85328.c b/gcc/testsuite/gcc.target/i386/pr85328.c
new file mode 100644
index 0000000..987ea82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr85328.c
@@ -0,0 +1,18 @@
+/* PR target/85328 */
+/* { dg-do assemble { target avx512f } } */
+/* { dg-options "-O3 -fno-caller-saves -mavx512f" } */
+
+typedef char U __attribute__((vector_size (64)));
+typedef int V __attribute__((vector_size (64)));
+U a, b;
+
+extern void bar (void);
+
+V
+foo (V f)
+{
+ b <<= (U){(V){}[63]} & 7;
+ bar ();
+ a = (U)f & 7;
+ return (V)b;
+}