aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel@linux.ibm.com>2024-06-10 09:09:10 +0200
committerJakub Jelinek <jakub@redhat.com>2024-06-20 13:04:31 +0200
commit166c9f99a2376b2255021de52a563e1268584c12 (patch)
tree2a5e469f6a78582d1da0fab93d5b30d847d2a2e2
parentf79e909a11672f2c5b04239d8d9376b900c5b295 (diff)
downloadgcc-166c9f99a2376b2255021de52a563e1268584c12.zip
gcc-166c9f99a2376b2255021de52a563e1268584c12.tar.gz
gcc-166c9f99a2376b2255021de52a563e1268584c12.tar.bz2
IBM Z: Fix ICE in expand_perm_as_replicate
The current implementation assumes to always be invoked with register operands. For memory operands we even have an instruction though (vlrep). With the patch we try this first and only if it fails force the input into a register and continue. vec_splats generation fails for single element 128bit types which are allowed for vec_splat. This is something to sort out with another patch I guess. gcc/ChangeLog: * config/s390/s390.cc (expand_perm_as_replicate): Handle memory operands. * config/s390/vx-builtins.md (vec_splats<mode>): Turn into parameterized expander. (@vec_splats<mode>): New expander. gcc/testsuite/ChangeLog: * g++.dg/torture/vshuf-mem.C: New test. (cherry picked from commit 21fd8c67ad297212e3cb885883cc8df8611f3040)
-rw-r--r--gcc/config/s390/s390.cc17
-rw-r--r--gcc/config/s390/vx-builtins.md2
-rw-r--r--gcc/testsuite/g++.dg/torture/vshuf-mem.C27
3 files changed, 43 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index fa517bd..ec836ec 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17940,7 +17940,8 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d)
unsigned char i;
unsigned char elem;
rtx base = d.op0;
- rtx insn;
+ rtx insn = NULL_RTX;
+
/* Needed to silence maybe-uninitialized warning. */
gcc_assert (d.nelt > 0);
elem = d.perm[0];
@@ -17954,7 +17955,19 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d)
base = d.op1;
elem -= d.nelt;
}
- insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+ if (memory_operand (base, d.vmode))
+ {
+ /* Try to use vector load and replicate. */
+ rtx new_base = adjust_address (base, GET_MODE_INNER (d.vmode),
+ elem * GET_MODE_UNIT_SIZE (d.vmode));
+ insn = maybe_gen_vec_splats (d.vmode, d.target, new_base);
+ }
+ if (insn == NULL_RTX)
+ {
+ base = force_reg (d.vmode, base);
+ insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+ }
+
if (insn == NULL_RTX)
return false;
emit_insn (insn);
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 93c0d40..bb271c0 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -145,7 +145,7 @@
DONE;
})
-(define_expand "vec_splats<mode>"
+(define_expand "@vec_splats<mode>"
[(set (match_operand:VEC_HW 0 "register_operand" "")
(vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand" "")))]
"TARGET_VX")
diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
new file mode 100644
index 0000000..5f1ebf6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
@@ -0,0 +1,27 @@
+// { dg-options "-std=c++11" }
+// { dg-do run }
+// { dg-additional-options "-march=z14" { target s390*-*-* } }
+
+/* This used to trigger (2024-05-28) the vectorize_vec_perm_const
+ backend hook to be invoked with a MEM source operand. Extracted
+ from onnxruntime's mlas library. */
+
+typedef float V4SF __attribute__((vector_size (16)));
+typedef int V4SI __attribute__((vector_size (16)));
+
+template < unsigned I0, unsigned I1, unsigned I2, unsigned I3 > V4SF
+MlasShuffleFloat32x4 (V4SF Vector)
+{
+ return __builtin_shuffle (Vector, Vector, V4SI{I0, I1, I2, I3});
+}
+
+int
+main ()
+{
+ V4SF f = { 1.0f, 2.0f, 3.0f, 4.0f };
+ if (MlasShuffleFloat32x4 < 1, 1, 1, 1 > (f)[3] != 2.0f)
+ __builtin_abort ();
+ if (MlasShuffleFloat32x4 < 3, 3, 3, 3 > (f)[1] != 4.0f)
+ __builtin_abort ();
+ return 0;
+}