Merge branch 'master' into devel/sphinx

author: Martin Liska <mliska@suse.cz> 2022-09-20 17:24:19 +0200
committer: Martin Liska <mliska@suse.cz> 2022-09-20 17:24:19 +0200
commit: c9c59aa19c0b7159636763294b7b0c87c696d675 (patch)
tree: d402ef5a5b905c4033575ae69b6b2b29fa8e977c /gcc
parent: 9934c1e645a45df66cbd9c428ccdaf75219ea200 (diff)
parent: d812e8cb2a920fd75768e16ca8ded59ad93c172f (diff)
download: gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.zip
gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.tar.gz
gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.tar.bz2
27 files changed, 193 insertions, 30 deletions
diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index 0873191..ce1a752 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -6582,7 +6582,7 @@ package body Exp_Ch6 is
 
          --  but optimize the case where the result is a function call that
          --  also needs finalization. In this case the result can directly be
-         --  allocated on the the return stack of the caller and no further
+         --  allocated on the return stack of the caller and no further
          --  processing is required.
 
          if Present (Utyp)
diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
index 0459058..7db0cb7 100644
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -505,7 +505,7 @@ package body Sem_Ch6 is
          --  this because it is not part of the original source.
          --  If this is an ignored Ghost entity, analysis of the generated
          --  body is needed to hide external references (as is done in
-         --  Analyze_Subprogram_Body) after which the the subprogram profile
+         --  Analyze_Subprogram_Body) after which the subprogram profile
          --  can be frozen, which is needed to expand calls to such an ignored
          --  Ghost subprogram.
 
diff --git a/gcc/ada/sem_disp.ads b/gcc/ada/sem_disp.ads
index 563b7f3..841fc74 100644
--- a/gcc/ada/sem_disp.ads
+++ b/gcc/ada/sem_disp.ads
@@ -63,7 +63,7 @@ package Sem_Disp is
    --  the inherited subprogram will have been hidden by the current one at
    --  the point of the type derivation, so it does not appear in the list
    --  of primitive operations of the type, and this procedure inserts the
-   --  overriding subprogram in the the full type's list of primitives by
+   --  overriding subprogram in the full type's list of primitives by
    --  iterating over the list for the parent type. If instead Subp is a new
    --  primitive, then it's simply appended to the primitive list.
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 9d78b27..12d9bee 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -530,7 +530,8 @@ static CONSTEXPR const function_group_info function_groups[] = {
 };
 
 /* The scalar type associated with each vector type.  */
-GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
+extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
+tree scalar_types[NUM_VECTOR_TYPES];
 
 /* The single-predicate and single-vector types, with their built-in
    "__SV..._t" name.  Allow an index of NUM_VECTOR_TYPES, which always
@@ -538,13 +539,16 @@ GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
 static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1];
 
 /* Same, but with the arm_sve.h "sv..._t" name.  */
-GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+extern GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
+tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
 
 /* The svpattern enum type.  */
-GTY(()) tree acle_svpattern;
+extern GTY(()) tree acle_svpattern;
+tree acle_svpattern;
 
 /* The svprfop enum type.  */
-GTY(()) tree acle_svprfop;
+extern GTY(()) tree acle_svprfop;
+tree acle_svprfop;
 
 /* The list of all registered function decls, indexed by code.  */
 static GTY(()) vec<registered_function *, va_gc> *registered_functions;
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 1a1ff5b..9a9ef4e 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -8922,6 +8922,7 @@ trees_out::type_node (tree type)
       if (streaming_p ())
 	u (PACK_EXPANSION_LOCAL_P (type));
       tree_node (PACK_EXPANSION_PARAMETER_PACKS (type));
+      tree_node (PACK_EXPANSION_EXTRA_ARGS (type));
       break;
 
     case TYPENAME_TYPE:
@@ -9455,12 +9456,14 @@ trees_in::tree_node (bool is_use)
 	    {
 	      bool local = u ();
 	      tree param_packs = tree_node ();
+	      tree extra_args = tree_node ();
 	      if (!get_overrun ())
 		{
 		  tree expn = cxx_make_type (TYPE_PACK_EXPANSION);
 		  SET_TYPE_STRUCTURAL_EQUALITY (expn);
 		  PACK_EXPANSION_PATTERN (expn) = res;
 		  PACK_EXPANSION_PARAMETER_PACKS (expn) = param_packs;
+		  PACK_EXPANSION_EXTRA_ARGS (expn) = extra_args;
 		  PACK_EXPANSION_LOCAL_P (expn) = local;
 		  res = expn;
 		}
diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
index 9773358..0964543 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -324,7 +324,7 @@ ctf_add_string (ctf_container_ref ctfc, const char * name,
   return ctfc_strtable_add_str (str_table, name, name_offset);
 }
 
-/* Add the compilation unit (CU) name string to the the CTF string table.  The
+/* Add the compilation unit (CU) name string to the CTF string table.  The
    CU name has a prepended pwd string if it is a relative path.  Also set the
    CU name offset in the CTF container.  */
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 3482554..d46963f 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -2282,7 +2282,7 @@ This constraint won't match unless @option{-mprefer-short-insn-regs} is
 in effect.
 
 @item Rsc
-The the register class of registers that can be used to hold a
+The register class of registers that can be used to hold a
 sibcall call address.  I.e., a caller-saved register.
 
 @item Rct
diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index 25410e6..4b4ecd5 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -455,7 +455,8 @@ version 2.6, @uref{https://www.openacc.org/}).  See
 The Fortran 95 standard specifies in Part 2 (ISO/IEC 1539-2:2000)
 varying length character strings.  While GNU Fortran currently does not
 support such strings directly, there exist two Fortran implementations
-for them, which work with GNU Fortran.
+for them, which work with GNU Fortran. One can be found at
+@uref{http://user.astro.wisc.edu/~townsend/static.php?ref=iso-varying-string}.
 
 Deferred-length character strings of Fortran 2003 supports part of
 the features of @code{ISO_VARYING_STRING} and should be considered as
@@ -3786,7 +3787,7 @@ The arguments are passed in the following order
 @code{CHARACTER} and no C binding is used
 @item The arguments in the order in which they appear in the Fortran
 declaration
-@item The the present status for optional arguments with value attribute,
+@item The present status for optional arguments with value attribute,
 which are internally passed by value
 @item The character length and/or coarray token and offset for the first
 argument which is a @code{CHARACTER} or a nonallocatable coarray dummy
diff --git a/gcc/gimple-range-infer.cc b/gcc/gimple-range-infer.cc
index 2d12f86..f0d66d0 100644
--- a/gcc/gimple-range-infer.cc
+++ b/gcc/gimple-range-infer.cc
@@ -56,7 +56,7 @@ non_null_loadstore (gimple *, tree op, tree, void *data)
   return false;
 }
 
-// Add NAME and RANGE to the the range inference summary.
+// Add NAME and RANGE to the range inference summary.
 
 void
 gimple_infer_range::add_range (tree name, vrange &range)
diff --git a/gcc/testsuite/g++.dg/modules/pr106761.h b/gcc/testsuite/g++.dg/modules/pr106761.h
new file mode 100644
index 0000000..9f22a22
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr106761.h
@@ -0,0 +1,22 @@
+// PR c++/106761
+
+template<class...>
+struct __and_;
+
+template<class, class>
+struct is_convertible;
+
+template<class... Ts>
+struct _TupleConstraints {
+  template<class... Us>
+  using __constructible = __and_<is_convertible<Ts, Us>...>;
+};
+
+template<class... Ts>
+struct tuple {
+  template<class... Us>
+  using __constructible
+    = typename _TupleConstraints<Ts...>::template __constructible<Us...>;
+};
+
+tuple<int, int> t;
diff --git a/gcc/testsuite/g++.dg/modules/pr106761_a.H b/gcc/testsuite/g++.dg/modules/pr106761_a.H
new file mode 100644
index 0000000..8ad1164
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr106761_a.H
@@ -0,0 +1,5 @@
+// PR c++/106761
+// { dg-additional-options -fmodule-header }
+
+// { dg-module-cmi {} }
+#include "pr106761.h"
diff --git a/gcc/testsuite/g++.dg/modules/pr106761_b.C b/gcc/testsuite/g++.dg/modules/pr106761_b.C
new file mode 100644
index 0000000..418991b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/pr106761_b.C
@@ -0,0 +1,7 @@
+// PR c++/106761
+// { dg-additional-options -fmodules-ts }
+
+#include "pr106761.h"
+import "pr106761_a.H";
+
+tuple<int, int> u = t;
diff --git a/gcc/testsuite/g++.dg/vect/pr106794.cc b/gcc/testsuite/g++.dg/vect/pr106794.cc
new file mode 100644
index 0000000..f056563
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr106794.cc
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-additional-options "-march=bdver2" { target x86_64-*-* i?86-*-* } } */
+
+template <class T> struct Vector3 {
+  Vector3();
+  Vector3(T, T, T);
+  T length() const;
+  T x, y, z;
+};
+template <class T>
+Vector3<T>::Vector3(T _x, T _y, T _z) : x(_x), y(_y), z(_z) {}
+Vector3<float> cross(Vector3<float> a, Vector3<float> b) {
+  return Vector3<float>(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z,
+                        a.x * b.y - a.y * b.x);
+}
+template <class T> T Vector3<T>::length() const { return z; }
+int generateNormals_i;
+float generateNormals_p2_0, generateNormals_p0_0;
+struct SphereMesh {
+  void generateNormals();
+  float vertices;
+};
+void SphereMesh::generateNormals() {
+  Vector3<float> *faceNormals = new Vector3<float>;
+  for (int j; j; j++) {
+    float *p0 = &vertices + 3, *p1 = &vertices + j * 3, *p2 = &vertices + 3,
+          *p3 = &vertices + generateNormals_i + j * 3;
+    Vector3<float> v0(p1[0] - generateNormals_p0_0, p1[1] - 1, p1[2] - 2),
+        v1(0, 1, 2);
+    if (v0.length())
+      v1 = Vector3<float>(p3[0] - generateNormals_p2_0, p3[1] - p2[1],
+                          p3[2] - p2[2]);
+    else
+      v1 = Vector3<float>(generateNormals_p0_0 - p3[0], p0[1] - p3[1],
+                          p0[2] - p3[2]);
+    Vector3<float> faceNormal = cross(v0, v1);
+    faceNormals[j] = faceNormal;
+  }
+}
diff --git a/gcc/testsuite/g++.dg/warn/Wclass-memaccess.C b/gcc/testsuite/g++.dg/warn/Wclass-memaccess.C
index 1dc23df..87aaa79 100644
--- a/gcc/testsuite/g++.dg/warn/Wclass-memaccess.C
+++ b/gcc/testsuite/g++.dg/warn/Wclass-memaccess.C
@@ -1144,7 +1144,7 @@ void test (HasVolRefAssign *p, const HasVolRefAssign &x,
 
 /* HasVirtuals should only be manipulated by the special member functions
    and not by bzero, memcpy, or any other raw memory function. Doing
-   otherwse might corrupt the the vtable pointer.  */
+   otherwse might corrupt the vtable pointer.  */
 struct HasVirtuals { int i; virtual void foo (); };
 
 void test (HasVirtuals *p, const HasVirtuals &x,
diff --git a/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C b/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
index 7386525..404c066 100644
--- a/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
+++ b/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
@@ -4,7 +4,7 @@
 
 // Before the fix that came with this test, we'd output an error for
 // the __INT_MAX__ token.  That token has a BUILTINS_LOCATION
-// location, so the the location prefix in the warning message would
+// location, so the location prefix in the warning message would
 // be:
 //     <built-in>:0:0: warning: conversion to 'float' alters 'int' constant value
 //
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr106970.c b/gcc/testsuite/gcc.dg/tree-ssa/pr106970.c
new file mode 100644
index 0000000..cda9bd4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr106970.c
@@ -0,0 +1,9 @@
+// { dg-do compile }
+// { dg-options "-O1 -fno-signed-zeros" }
+
+void
+foo (double x, double y)
+{
+  if (!x == !y * -1.0)
+    __builtin_trap ();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr106914.c b/gcc/testsuite/gcc.dg/vect/pr106914.c
new file mode 100644
index 0000000..9d9b3e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr106914.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fprofile-generate" } */
+/* { dg-additional-options "-mavx512vl" { target x86_64-*-* i?86-*-* } } */
+
+int *mask_slp_int64_t_8_2_x, *mask_slp_int64_t_8_2_y, *mask_slp_int64_t_8_2_z;
+
+void
+__attribute__mask_slp_int64_t_8_2() {
+  for (int i; i; i += 8) {
+    mask_slp_int64_t_8_2_x[i + 6] =
+        mask_slp_int64_t_8_2_y[i + 6] ? mask_slp_int64_t_8_2_z[i] : 1;
+    mask_slp_int64_t_8_2_x[i + 7] =
+        mask_slp_int64_t_8_2_y[i + 7] ? mask_slp_int64_t_8_2_z[i + 7] : 2;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-5.c b/gcc/testsuite/gcc.dg/vect/vect-gather-5.c
new file mode 100644
index 0000000..8b5074b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-5.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+#ifdef __aarch64__
+#pragma GCC target "+sve"
+#endif
+
+long a[100], b[100], c[100];
+
+void g1 ()
+{
+  for (int i = 0; i < 100; i += 2)
+    {
+      c[i] += a[b[i]] + 1;
+      c[i + 1] += a[b[i + 1]] + 2;
+    }
+}
+
+long g2 ()
+{
+  long res = 0;
+  for (int i = 0; i < 100; i += 2)
+    {
+      res += a[b[i + 1]];
+      res += a[b[i]];
+    }
+  return res;
+}
+
+long g3 ()
+{
+  long res = 0;
+  for (int i = 0; i < 100; i += 2)
+    {
+      res += a[b[i]];
+      res += a[b[i + 1]];
+    }
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times {add new stmt[^\n]*GATHER_LOAD} 3 "vect" { target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump-not {add new stmt[^\n]*VEC_PERM_EXPR} "vect" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c b/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c
index 2b79145..d7d3ad7 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-extract-1.c
@@ -4,7 +4,7 @@
 
 /* Test to make sure VEXTU{B,H,W}{L,R}X is generated for various vector extract
    operations for ISA 3.0 (-mcpu=power9).  In addition, make sure that neither
-   of the the the old methods of doing vector extracts are done either by
+   of the old methods of doing vector extracts are done either by
    explict stores to the stack or by using direct move instructions.  */
 
 #include <altivec.h>
diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
index 0c44070..cae7ea6 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -30,7 +30,7 @@ load_lib target-supports.exp
 load_lib gfortran-dg.exp
 load_lib atomic-dg.exp
 
-# Return 1 if the the assembler understands .machine and .machinemode.  The
+# Return 1 if the assembler understands .machine and .machinemode.  The
 # target attribute needs that feature to work.
 proc check_effective_target_target_attribute { } {
     if { ![check_runtime s390_check_machine_machinemode [subst {
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
index 09a15eb..c4e75f5 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
@@ -1,5 +1,5 @@
 /* Similiar to vec-cmp-1.c but requires that
-   s390_canonicalize_comparison is able to merge the the two nested
+   s390_canonicalize_comparison is able to merge the two nested
    compares.  */
 
 /* { dg-do compile { target { s390*-*-* } } } */
diff --git a/gcc/testsuite/gdc.dg/torture/simd_store.d b/gcc/testsuite/gdc.dg/torture/simd_store.d
index b96ed42..234c020 100644
--- a/gcc/testsuite/gdc.dg/torture/simd_store.d
+++ b/gcc/testsuite/gdc.dg/torture/simd_store.d
@@ -23,7 +23,7 @@ void main()
             // store `v` to location pointed to by `d`
             storeUnaligned(cast(T*)d, v);
 
-            // check that the the data was stored correctly
+            // check that the data was stored correctly
             foreach (j; 0..T.sizeof)
                 assert(ptrToV[j] == d[j]);
         }
diff --git a/gcc/testsuite/gfortran.dg/actual_array_offset_1.f90 b/gcc/testsuite/gfortran.dg/actual_array_offset_1.f90
index a78f546..53dbc2e 100644
--- a/gcc/testsuite/gfortran.dg/actual_array_offset_1.f90
+++ b/gcc/testsuite/gfortran.dg/actual_array_offset_1.f90
@@ -155,7 +155,7 @@ program test_quicksort
 
     call quicksort( array )
 
-! Check the the array is correctly ordered
+! Check the array is correctly ordered
     if (.not.check (array)) STOP 2
 contains
      logical function check (arg)
diff --git a/gcc/testsuite/gfortran.dg/pdt_15.f03 b/gcc/testsuite/gfortran.dg/pdt_15.f03
index 30c7f18..4ae1983 100644
--- a/gcc/testsuite/gfortran.dg/pdt_15.f03
+++ b/gcc/testsuite/gfortran.dg/pdt_15.f03
@@ -1,7 +1,7 @@
 ! { dg-do compile }
 ! { dg-options "-fdump-tree-original" }
 !
-! Test the fix for PR82375. This is a wrinkle on the the allocatable
+! Test the fix for PR82375. This is a wrinkle on the allocatable
 ! version of pdt_13.f03, pdt_14.f03, whereby 'root' is now declared
 ! in a subroutine so that it should be cleaned up automatically. This
 ! is best tested with valgrind or its like.
diff --git a/gcc/testsuite/gfortran.dg/pointer_array_8.f90 b/gcc/testsuite/gfortran.dg/pointer_array_8.f90
index 3bb2a1b..1cc1787 100644
--- a/gcc/testsuite/gfortran.dg/pointer_array_8.f90
+++ b/gcc/testsuite/gfortran.dg/pointer_array_8.f90
@@ -30,7 +30,7 @@
 
   select type (cptr)
     type is (integer)
-      if (any (cptr .ne. [1,2,3])) STOP 3! Check the the scalarizer works.
+      if (any (cptr .ne. [1,2,3])) STOP 3! Check the scalarizer works.
       if (cptr(2) .ne. 2) STOP 4! Check ordinary array indexing.
   end select
 
@@ -63,7 +63,7 @@ contains
     addr = loc(arg)
     select type (arg)
       type is (integer)
-        if (any (arg .ne. [1,2,3])) STOP 11! Check the the scalarizer works.
+        if (any (arg .ne. [1,2,3])) STOP 11! Check the scalarizer works.
         if (arg(2) .ne. 2) STOP 12! Check ordinary array indexing.
     end select
   end subroutine
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index b279a82..e03b504 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4151,6 +4151,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
 	  /* Don't include the conversion if the target is happy with
 	     the current offset type.  */
 	  if (use_ifn_p
+	      && TREE_CODE (off) == SSA_NAME
 	      && !POINTER_TYPE_P (TREE_TYPE (off))
 	      && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
 					   masked_p, vectype, memory_type,
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ca3422c..229f266 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4494,7 +4494,8 @@ vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
   stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node);
   if (rep
       && STMT_VINFO_DATA_REF (rep)
-      && DR_IS_READ (STMT_VINFO_DATA_REF (rep)))
+      && DR_IS_READ (STMT_VINFO_DATA_REF (rep))
+      && SLP_TREE_LOAD_PERMUTATION (node).exists ())
     {
       auto_load_permutation_t tmp_perm;
       tmp_perm.safe_splice (SLP_TREE_LOAD_PERMUTATION (node));
@@ -4569,8 +4570,12 @@ vect_optimize_slp_pass::start_choosing_layouts ()
       if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
 	{
 	  /* If splitting out a SLP_TREE_LANE_PERMUTATION can make the node
-	     unpermuted, record a layout that reverses this permutation.  */
-	  gcc_assert (partition.layout == 0);
+	     unpermuted, record a layout that reverses this permutation.
+
+	     We would need more work to cope with loads that are internally
+	     permuted and also have inputs (such as masks for
+	     IFN_MASK_LOADs).  */
+	  gcc_assert (partition.layout == 0 && !m_slpg->vertices[node_i].succ);
 	  if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
 	    continue;
 	  dr_stmt = DR_GROUP_FIRST_ELEMENT (dr_stmt);
@@ -4684,12 +4689,21 @@ vect_optimize_slp_pass::start_choosing_layouts ()
 	  vertex.weight = vect_slp_node_weight (node);
 
 	  /* We do not handle stores with a permutation, so all
-	     incoming permutations must have been materialized.  */
+	     incoming permutations must have been materialized.
+
+	     We also don't handle masked grouped loads, which lack a
+	     permutation vector.  In this case the memory locations
+	     form an implicit second input to the loads, on top of the
+	     explicit mask input, and the memory input's layout cannot
+	     be changed.
+
+	     On the other hand, we do support permuting gather loads and
+	     masked gather loads, where each scalar load is independent
+	     of the others.  This can be useful if the address/index input
+	     benefits from permutation.  */
 	  if (STMT_VINFO_DATA_REF (rep)
-	      && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
-	    /* ???  We're forcing materialization in place
-	       of the child here, we'd need special handling
-	       in materialization to leave layout -1 here.  */
+	      && STMT_VINFO_GROUPED_ACCESS (rep)
+	      && !SLP_TREE_LOAD_PERMUTATION (node).exists ())
 	    partition.layout = 0;
 
 	  /* We cannot change the layout of an operation that is
author	Martin Liska <mliska@suse.cz>	2022-09-20 17:24:19 +0200
committer	Martin Liska <mliska@suse.cz>	2022-09-20 17:24:19 +0200
commit	c9c59aa19c0b7159636763294b7b0c87c696d675 (patch)
tree	d402ef5a5b905c4033575ae69b6b2b29fa8e977c /gcc
parent	9934c1e645a45df66cbd9c428ccdaf75219ea200 (diff)
parent	d812e8cb2a920fd75768e16ca8ded59ad93c172f (diff)
download	gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.zip gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.tar.gz gcc-c9c59aa19c0b7159636763294b7b0c87c696d675.tar.bz2