aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorSegher Boessenkool <segher@kernel.crashing.org>2018-07-23 13:27:38 +0200
committerSegher Boessenkool <segher@gcc.gnu.org>2018-07-23 13:27:38 +0200
commit9fede15c4d5f7873ed906eb8cddee7cb35d2cec4 (patch)
tree4f8dd49a9f0622b8b149e9208bdc828ff65ba1b0 /gcc
parent268e16e89b2fe7ed6f9f03c877e05714464a704a (diff)
downloadgcc-9fede15c4d5f7873ed906eb8cddee7cb35d2cec4.zip
gcc-9fede15c4d5f7873ed906eb8cddee7cb35d2cec4.tar.gz
gcc-9fede15c4d5f7873ed906eb8cddee7cb35d2cec4.tar.bz2
rs6000: Improve vsx_init_v4si
This changes vsx_init_v4si to be an expander. That way, no special cases are needed anymore for special arguments: the normal RTL passes can deal with it. * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust. * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete. * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force the elements into a register. (rs6000_split_v4si_init_di_reg): Delete. (rs6000_split_v4si_init): Delete. * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT. (vsx_init_v4si): Rewrite as a define_expand. From-SVN: r262930
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/rs6000/rs6000-p8swap.c1
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c92
-rw-r--r--gcc/config/rs6000/vsx.md49
5 files changed, 45 insertions, 109 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7dc5e0b..1bf8b46 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
+ * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
+ * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
+ * config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
+ the elements into a register.
+ (rs6000_split_v4si_init_di_reg): Delete.
+ (rs6000_split_v4si_init): Delete.
+ * config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
+ (vsx_init_v4si): Rewrite as a define_expand.
+
+2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
+
* config/rs6000/rs6000.md (splitters for rldimi and rlwimi with the
zero_extend argument from memory): New.
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index 071bc0c..f32db38 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VSX_EXTRACT:
case UNSPEC_VSX_SET:
case UNSPEC_VSX_SLDWI:
- case UNSPEC_VSX_VEC_INIT:
case UNSPEC_VSX_VSLO:
case UNSPEC_VUNPACK_HI_SIGN:
case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 714b8a8f..fc45aa5 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
-extern void rs6000_split_v4si_init (rtx []);
extern void altivec_expand_vec_perm_le (rtx op[4]);
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index caa35e0..2b736d7 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6857,11 +6857,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
size_t i;
for (i = 0; i < 4; i++)
- {
- elements[i] = XVECEXP (vals, 0, i);
- if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
- elements[i] = copy_to_mode_reg (SImode, elements[i]);
- }
+ elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
elements[2], elements[3]));
@@ -7568,92 +7564,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
gcc_unreachable ();
}
-/* Helper function for rs6000_split_v4si_init to build up a DImode value from
- two SImode values. */
-
-static void
-rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
-{
- const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
-
- if (CONST_INT_P (si1) && CONST_INT_P (si2))
- {
- unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
- unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
-
- emit_move_insn (dest, GEN_INT (const1 | const2));
- return;
- }
-
- /* Put si1 into upper 32-bits of dest. */
- if (CONST_INT_P (si1))
- emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
- else
- {
- /* Generate RLDIC. */
- rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
- rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
- rtx mask_rtx = GEN_INT (mask_32bit << 32);
- rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
- gcc_assert (!reg_overlap_mentioned_p (dest, si1));
- emit_insn (gen_rtx_SET (dest, and_rtx));
- }
-
- /* Put si2 into the temporary. */
- gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
- if (CONST_INT_P (si2))
- emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
- else
- emit_insn (gen_zero_extendsidi2 (tmp, si2));
-
- /* Combine the two parts. */
- emit_insn (gen_iordi3 (dest, dest, tmp));
- return;
-}
-
-/* Split a V4SI initialization. */
-
-void
-rs6000_split_v4si_init (rtx operands[])
-{
- rtx dest = operands[0];
-
- /* Destination is a GPR, build up the two DImode parts in place. */
- if (REG_P (dest) || SUBREG_P (dest))
- {
- int d_regno = regno_or_subregno (dest);
- rtx scalar1 = operands[1];
- rtx scalar2 = operands[2];
- rtx scalar3 = operands[3];
- rtx scalar4 = operands[4];
- rtx tmp1 = operands[5];
- rtx tmp2 = operands[6];
-
- /* Even though we only need one temporary (plus the destination, which
- has an early clobber constraint, try to use two temporaries, one for
- each double word created. That way the 2nd insn scheduling pass can
- rearrange things so the two parts are done in parallel. */
- if (BYTES_BIG_ENDIAN)
- {
- rtx di_lo = gen_rtx_REG (DImode, d_regno);
- rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
- rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
- rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
- }
- else
- {
- rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
- rtx di_hi = gen_rtx_REG (DImode, d_regno);
- rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
- rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
- }
- return;
- }
-
- else
- gcc_unreachable ();
-}
-
/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
selects whether the alignment is abi mandated, optional, or
both abi and optional alignment. */
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 432aa1e..de2fa78 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -388,7 +388,6 @@
UNSPEC_VSX_VXSIG
UNSPEC_VSX_VIEXP
UNSPEC_VSX_VTSTDC
- UNSPEC_VSX_VEC_INIT
UNSPEC_VSX_VSIGNED2
UNSPEC_LXVL
@@ -2946,23 +2945,41 @@
}
[(set_attr "type" "vecperm")])
-;; V4SImode initialization splitter
-(define_insn_and_split "vsx_init_v4si"
- [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
- (unspec:V4SI
- [(match_operand:SI 1 "reg_or_cint_operand" "rn")
- (match_operand:SI 2 "reg_or_cint_operand" "rn")
- (match_operand:SI 3 "reg_or_cint_operand" "rn")
- (match_operand:SI 4 "reg_or_cint_operand" "rn")]
- UNSPEC_VSX_VEC_INIT))
- (clobber (match_scratch:DI 5 "=&r"))
- (clobber (match_scratch:DI 6 "=&r"))]
+;; Concatenate 4 SImode elements into a V4SImode reg.
+(define_expand "vsx_init_v4si"
+ [(use (match_operand:V4SI 0 "gpc_reg_operand"))
+ (use (match_operand:SI 1 "gpc_reg_operand"))
+ (use (match_operand:SI 2 "gpc_reg_operand"))
+ (use (match_operand:SI 3 "gpc_reg_operand"))
+ (use (match_operand:SI 4 "gpc_reg_operand"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
- "#"
- "&& reload_completed"
- [(const_int 0)]
{
- rs6000_split_v4si_init (operands);
+ rtx a = gen_reg_rtx (DImode);
+ rtx b = gen_reg_rtx (DImode);
+ rtx c = gen_reg_rtx (DImode);
+ rtx d = gen_reg_rtx (DImode);
+ emit_insn (gen_zero_extendsidi2 (a, operands[1]));
+ emit_insn (gen_zero_extendsidi2 (b, operands[2]));
+ emit_insn (gen_zero_extendsidi2 (c, operands[3]));
+ emit_insn (gen_zero_extendsidi2 (d, operands[4]));
+ if (!BYTES_BIG_ENDIAN)
+ {
+ std::swap (a, b);
+ std::swap (c, d);
+ }
+
+ rtx aa = gen_reg_rtx (DImode);
+ rtx ab = gen_reg_rtx (DImode);
+ rtx cc = gen_reg_rtx (DImode);
+ rtx cd = gen_reg_rtx (DImode);
+ emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
+ emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
+ emit_insn (gen_iordi3 (ab, aa, b));
+ emit_insn (gen_iordi3 (cd, cc, d));
+
+ rtx abcd = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
+ emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
DONE;
})