aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-07-27 09:38:54 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2017-07-27 09:38:54 +0000
commit02d3ba0e000ad83dcb76ccccd1ea9882672d71b1 (patch)
tree63d75fb9aea209c71ea9ee3b902b48fcd381c5a7 /gcc
parent7d25ac209de8262e39e5551585da5094a4c5c317 (diff)
downloadgcc-02d3ba0e000ad83dcb76ccccd1ea9882672d71b1.zip
gcc-02d3ba0e000ad83dcb76ccccd1ea9882672d71b1.tar.gz
gcc-02d3ba0e000ad83dcb76ccccd1ea9882672d71b1.tar.bz2
[rs6000] Avoid rotates of floating-point modes
The little-endian VSX code uses rotates to swap the two 64-bit halves of 128-bit scalar modes. This is fine for TImode and V1TImode, but it isn't really valid to use RTL rotates on floating-point modes like KFmode and TFmode, and doing that triggered an assert added by the SVE series. This patch uses bit-casts to V1TImode instead. 2017-07-27 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare. * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with... (rs6000_emit_le_vsx_permute): ...this. Take the destination as input. Emit instructions rather than returning an expression. Handle TFmode and KFmode by casting to TImode. (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute. (rs6000_emit_le_vsx_store): Likewise. * config/rs6000/vsx.md (VSX_TI): New iterator. (*vsx_le_permute_<mode>): Use it instead of VSX_LE_128. (*vsx_le_undo_permute_<mode>): Likewise. (*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to emit the split sequence. (*vsx_le_perm_store_<mode>): Likewise. From-SVN: r250615
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c49
-rw-r--r--gcc/config/rs6000/vsx.md64
4 files changed, 74 insertions, 56 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 345034d..7444943 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2017-07-27 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
+ * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
+ (rs6000_emit_le_vsx_permute): ...this. Take the destination as input.
+ Emit instructions rather than returning an expression. Handle TFmode
+ and KFmode by casting to TImode.
+ (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
+ (rs6000_emit_le_vsx_store): Likewise.
+ * config/rs6000/vsx.md (VSX_TI): New iterator.
+ (*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
+ (*vsx_le_undo_permute_<mode>): Likewise.
+ (*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
+ emit the split sequence.
+ (*vsx_le_perm_store_<mode>): Likewise.
+
2017-07-27 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/81555
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index aeec9b2..1b4932e 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
extern void rs6000_fatal_bad_address (rtx);
extern rtx create_TOC_reference (rtx, rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
extern bool valid_sf_si_move (rtx, rtx, machine_mode);
extern void rs6000_emit_move (rtx, rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cb2a30b..7461dec 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -10374,19 +10374,30 @@ rs6000_const_vec (machine_mode mode)
return v;
}
-/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
- for a VSX load or store operation. */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
+ store operation. */
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
{
- /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
- 128-bit integers if they are allowed in VSX registers. */
- if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
- return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+ /* Scalar permutations are easier to express in integer modes rather than
+ floating-point modes, so cast them here. We use V1TImode instead
+ of TImode to ensure that the values don't go through GPRs. */
+ if (FLOAT128_VECTOR_P (mode))
+ {
+ dest = gen_lowpart (V1TImode, dest);
+ source = gen_lowpart (V1TImode, source);
+ mode = V1TImode;
+ }
+
+ /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+ scalar. */
+ if (mode == TImode || mode == V1TImode)
+ emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+ GEN_INT (64))));
else
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
- return gen_rtx_VEC_SELECT (mode, source, par);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
}
}
@@ -10396,8 +10407,6 @@ rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_mem, permute_reg;
-
/* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
@@ -10407,11 +10416,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
source = adjust_address (source, V2DImode, 0);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
- permute_mem = rs6000_gen_le_vsx_permute (source, mode);
- permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_mem));
- emit_insn (gen_rtx_SET (dest, permute_reg));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a little-endian store to vector memory location DEST from VSX
@@ -10420,8 +10427,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_src, permute_tmp;
-
/* This should never be called during or after reload, because it does
not re-permute the source register. It is intended only for use
during expand. */
@@ -10436,11 +10441,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
source = gen_lowpart (V2DImode, source);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
- permute_src = rs6000_gen_le_vsx_permute (source, mode);
- permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_src));
- emit_insn (gen_rtx_SET (dest, permute_tmp));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a sequence representing a little-endian VSX load or store,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index e6b98e0..b2b85c1 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -37,6 +37,9 @@
(TI "TARGET_VSX_TIMODE")
V1TI])
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -756,9 +759,9 @@
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+ [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
@@ -769,10 +772,10 @@
(set_attr "type" "vecperm,vecload,vecstore")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
- (rotate:VSX_LE_128
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+ [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+ (rotate:VSX_TI
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -797,16 +800,15 @@
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
"
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
}
"
[(set_attr "type" "vecload")
@@ -824,15 +826,14 @@
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
})
;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -856,16 +857,13 @@
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))]
- "")
+ [(const_int 0)]
+{
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ DONE;
+})
;; Vector constants that can be generated with XXSPLTIB that was added in ISA
;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.