diff options
author | Richard Biener <rguenther@suse.de> | 2023-05-24 10:07:36 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2023-05-24 14:45:12 +0200 |
commit | affee7dcfa1ee272d43ac7cb68cf423dbd956fd8 (patch) | |
tree | 6a5c61413b4decc607d4c1e2d8eddd4e519f8db3 | |
parent | 257c2be7ff8dfdc610202a1e1f5a8a668b939bdb (diff) | |
download | gcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.zip gcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.tar.gz gcc-affee7dcfa1ee272d43ac7cb68cf423dbd956fd8.tar.bz2 |
target/109944 - avoid STLF fail for V16QImode CTOR expansion
The following dispatches to V2DImode CTOR expansion instead of
using sets of (subreg:DI (reg:V16QI 146) [08]) which causes
LRA to spill DImode and reload V16QImode. The same applies for
V8QImode or V4HImode construction from SImode parts which happens
during 32bit libgcc build.
PR target/109944
* config/i386/i386-expand.cc (ix86_expand_vector_init_general):
Perform final vector composition using
ix86_expand_vector_init_general instead of setting
the highpart and lowpart which causes spilling.
* gcc.target/i386/pr109944-1.c: New testcase.
* gcc.target/i386/pr109944-2.c: Likewise.
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 |
3 files changed, 53 insertions, 5 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ff3d382..19acd9c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -16367,11 +16367,12 @@ quarter: emit_move_insn (target, gen_lowpart (mode, words[0])); else if (n_words == 2) { - rtx tmp = gen_reg_rtx (mode); - emit_clobber (tmp); - emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]); - emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]); - emit_move_insn (target, tmp); + gcc_assert (tmp_mode == DImode || tmp_mode == SImode); + machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode; + rtx tmp = gen_reg_rtx (concat_mode); + vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words)); + ix86_expand_vector_init_general (false, concat_mode, tmp, vals); + emit_move_insn (target, gen_lowpart (mode, tmp)); } else if (n_words == 4) { diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c b/gcc/testsuite/gcc.target/i386/pr109944-1.c new file mode 100644 index 0000000..d82214d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void foo (char * __restrict a, char *b) +{ + a[0] = b[0]; + a[1] = b[16]; + a[2] = b[32]; + a[3] = b[48]; + a[4] = b[64]; + a[5] = b[80]; + a[6] = b[96]; + a[7] = b[112]; + a[8] = b[128]; + a[9] = b[144]; + a[10] = b[160]; + a[11] = b[176]; + a[12] = b[192]; + a[13] = b[208]; + a[14] = b[224]; + a[15] = b[240]; +} + +/* We do not want to generate a spill/reload for when the store is vectorized. + movq %rdx, -24(%rsp) +... + movq %rax, -16(%rsp) + movdqa -24(%rsp), %xmm0 + movups %xmm0, (%rdi) */ +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c b/gcc/testsuite/gcc.target/i386/pr109944-2.c new file mode 100644 index 0000000..318dfab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef char v16qi __attribute__((vector_size(16))); +v16qi foo (char *b) +{ + return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112], + b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] }; +} + +/* We do not want to generate a spill/reload + movq %rdx, -24(%rsp) +... + movq %rax, -16(%rsp) + movdqa -24(%rsp), %xmm0 + movups %xmm0, (%rdi) */ +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */ |