aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2023-10-24 11:26:45 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2023-10-24 11:26:45 +0100
commit58de8e933d19516d1a9de66a617378493ffa29a5 (patch)
treed8096065ef71078612958eb93fd4e979646f6953 /gcc
parentcc4779559bea982f9307a8bc1cb017b8eda28d6d (diff)
downloadgcc-58de8e933d19516d1a9de66a617378493ffa29a5.zip
gcc-58de8e933d19516d1a9de66a617378493ffa29a5.tar.gz
gcc-58de8e933d19516d1a9de66a617378493ffa29a5.tar.bz2
i386: Avoid paradoxical subreg dests in vector zero_extend
For the V2HI -> V2SI zero extension in: typedef unsigned short v2hi __attribute__((vector_size(4))); typedef unsigned int v2si __attribute__((vector_size(8))); v2si f (v2hi x) { return (v2si) {x[0], x[1]}; } ix86_expand_sse_extend would generate: (set (reg:V2HI 102) (const_vector:V2HI [(const_int 0 [0]) (const_int 0 [0])])) (set (subreg:V8HI (reg:V2HI 101) 0) (vec_select:V8HI (vec_concat:V16HI (subreg:V8HI (reg/v:V2HI 99 [ x ]) 0) (subreg:V8HI (reg:V2HI 102) 0)) (parallel [(const_int 0 [0]) (const_int 8 [0x8]) (const_int 1 [0x1]) (const_int 9 [0x9]) (const_int 2 [0x2]) (const_int 10 [0xa]) (const_int 3 [0x3]) (const_int 11 [0xb])]))) (set (reg:V2SI 100) (subreg:V2SI (reg:V2HI 101) 0)) (expr_list:REG_EQUAL (zero_extend:V2SI (reg/v:V2HI 99 [ x ]))) But using (subreg:V8HI (reg:V2HI 101) 0) as the destination of the vec_select means that only the low 4 bytes of the destination are stored. Only the lower half of reg 100 is well-defined. Things tend to happen to work if the register allocator ties reg 101 to reg 100. But it caused problems with the upcoming late-combine pass because we propagated the set of reg 100 into its uses. gcc/ * config/i386/i386-expand.cc (ix86_split_mmx_punpck): Allow the destination to be wider than the sources. Take the mode from the first source. (ix86_expand_sse_extend): Pass the destination directly to ix86_split_mmx_punpck, rather than using a fresh register that is half the size.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.cc9
1 files changed, 5 insertions, 4 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1eae9d7..2361ff7 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1110,7 +1110,9 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
-/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. This is also used
+ for a full unpack of OPERANDS[1] and OPERANDS[2] into a wider
+ OPERANDS[0]. */
void
ix86_split_mmx_punpck (rtx operands[], bool high_p)
@@ -1118,7 +1120,7 @@ ix86_split_mmx_punpck (rtx operands[], bool high_p)
rtx op0 = operands[0];
rtx op1 = operands[1];
rtx op2 = operands[2];
- machine_mode mode = GET_MODE (op0);
+ machine_mode mode = GET_MODE (op1);
rtx mask;
/* The corresponding SSE mode. */
machine_mode sse_mode, double_sse_mode;
@@ -5660,7 +5662,7 @@ ix86_expand_sse_extend (rtx dest, rtx src, bool unsigned_p)
gcc_unreachable ();
}
- ops[0] = gen_reg_rtx (imode);
+ ops[0] = dest;
ops[1] = force_reg (imode, src);
@@ -5671,7 +5673,6 @@ ix86_expand_sse_extend (rtx dest, rtx src, bool unsigned_p)
ops[1], pc_rtx, pc_rtx);
ix86_split_mmx_punpck (ops, false);
- emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), ops[0], imode));
}
/* Unpack SRC into the next wider integer vector type. UNSIGNED_P is