aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-05-24 10:07:36 +0200
committerRichard Biener <rguenther@suse.de>2023-05-24 14:45:12 +0200
commitaffee7dcfa1ee272d43ac7cb68cf423dbd956fd8 (patch)
tree6a5c61413b4decc607d4c1e2d8eddd4e519f8db3
parent257c2be7ff8dfdc610202a1e1f5a8a668b939bdb (diff)
downloadgcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.zip
gcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.tar.gz
gcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.tar.bz2
target/109944 - avoid STLF fail for V16QImode CTOR expansion
The following dispatches to V2DImode CTOR expansion instead of using sets of (subreg:DI (reg:V16QI 146) [08]) which causes LRA to spill DImode and reload V16QImode. The same applies for V8QImode or V4HImode construction from SImode parts which happens during 32bit libgcc build. PR target/109944 * config/i386/i386-expand.cc (ix86_expand_vector_init_general): Perform final vector composition using ix86_expand_vector_init_general instead of setting the highpart and lowpart which causes spilling. * gcc.target/i386/pr109944-1.c: New testcase. * gcc.target/i386/pr109944-2.c: Likewise.
-rw-r--r--gcc/config/i386/i386-expand.cc11
-rw-r--r--gcc/testsuite/gcc.target/i386/pr109944-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/pr109944-2.c17
3 files changed, 53 insertions, 5 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ff3d382..19acd9c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -16367,11 +16367,12 @@ quarter:
emit_move_insn (target, gen_lowpart (mode, words[0]));
else if (n_words == 2)
{
- rtx tmp = gen_reg_rtx (mode);
- emit_clobber (tmp);
- emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
- emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
- emit_move_insn (target, tmp);
+ gcc_assert (tmp_mode == DImode || tmp_mode == SImode);
+ machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode;
+ rtx tmp = gen_reg_rtx (concat_mode);
+ vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words));
+ ix86_expand_vector_init_general (false, concat_mode, tmp, vals);
+ emit_move_insn (target, gen_lowpart (mode, tmp));
}
else if (n_words == 4)
{
diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c b/gcc/testsuite/gcc.target/i386/pr109944-1.c
new file mode 100644
index 0000000..d82214d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void foo (char * __restrict a, char *b)
+{
+ a[0] = b[0];
+ a[1] = b[16];
+ a[2] = b[32];
+ a[3] = b[48];
+ a[4] = b[64];
+ a[5] = b[80];
+ a[6] = b[96];
+ a[7] = b[112];
+ a[8] = b[128];
+ a[9] = b[144];
+ a[10] = b[160];
+ a[11] = b[176];
+ a[12] = b[192];
+ a[13] = b[208];
+ a[14] = b[224];
+ a[15] = b[240];
+}
+
+/* We do not want to generate a spill/reload for when the store is vectorized.
+ movq %rdx, -24(%rsp)
+...
+ movq %rax, -16(%rsp)
+ movdqa -24(%rsp), %xmm0
+ movups %xmm0, (%rdi) */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c b/gcc/testsuite/gcc.target/i386/pr109944-2.c
new file mode 100644
index 0000000..318dfab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char v16qi __attribute__((vector_size(16)));
+v16qi foo (char *b)
+{
+ return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112],
+ b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] };
+}
+
+/* We do not want to generate a spill/reload
+ movq %rdx, -24(%rsp)
+...
+ movq %rax, -16(%rsp)
+ movdqa -24(%rsp), %xmm0
+ movups %xmm0, (%rdi) */
+/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */