aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-30 09:48:24 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-30 09:48:24 +0000
commit8711e791deaf97590d68ee82ff7a0b81d54e944d (patch)
tree74054d566ac7b36c615afb50fe7751bc16a75ae3 /gcc
parente89b01f2b1bb7b4a689502dd23775301ef36eb0d (diff)
downloadgcc-8711e791deaf97590d68ee82ff7a0b81d54e944d.zip
gcc-8711e791deaf97590d68ee82ff7a0b81d54e944d.tar.gz
gcc-8711e791deaf97590d68ee82ff7a0b81d54e944d.tar.bz2
[AArch64] Fix sve/extract_[12].c for big-endian SVE
sve/extract_[12].c were relying on the target-independent optimisation that removes a redundant vec_select, so that we don't end up with things like: dup v0.4s, v0.4s[0] ...use s0... But that optimisation rightly doesn't trigger for big-endian targets, because GCC expects lane 0 to be in the high part of the register rather than the low part. SVE breaks this assumption -- see the comment at the head of aarch64-sve.md for details -- so the optimisation is valid for both endiannesses. Long term, we probably need some kind of target hook to make GCC aware of this. But there's another problem with the current extract pattern: it doesn't tell the register allocator how cheap an extraction of lane 0 is with tied registers. It seems better to split the lane 0 case out into its own pattern and use tied operands for the FPR<-SIMD case, so that using different registers has the cost of an extra reload. I think we want this for both endiannesses, regardless of the hook described above. Also, the gen_lowpart in this pattern fails for aarch64_be due to TARGET_CAN_CHANGE_MODE_CLASS restrictions, so the patch uses gen_rtx_REG instead. We're only creating this rtl in order to print it, so there's no need for anything fancier. 2018-01-30 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * config/aarch64/aarch64-sve.md (*vec_extract<mode><Vel>_0): New pattern. (*vec_extract<mode><Vel>_v128): Require a nonzero lane number. Use gen_rtx_REG rather than gen_lowpart. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> From-SVN: r257178
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/aarch64/aarch64-sve.md40
2 files changed, 44 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 771aa01..119be1b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
2018-01-30 Richard Sandiford <richard.sandiford@linaro.org>
+ * config/aarch64/aarch64-sve.md (*vec_extract<mode><Vel>_0): New
+ pattern.
+ (*vec_extract<mode><Vel>_v128): Require a nonzero lane number.
+ Use gen_rtx_REG rather than gen_lowpart.
+
+2018-01-30 Richard Sandiford <richard.sandiford@linaro.org>
+
* lra-constraints.c (match_reload): Use subreg_lowpart_offset
rather than 0 when creating partial subregs.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 8da10c1..ee942df 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -484,18 +484,52 @@
}
)
+;; Extract element zero. This is a special case because we want to force
+;; the registers to be the same for the second alternative, and then
+;; split the instruction into nothing after RA.
+(define_insn_and_split "*vec_extract<mode><Vel>_0"
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
+ (parallel [(const_int 0)])))]
+ "TARGET_SVE"
+ {
+ operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+ switch (which_alternative)
+ {
+ case 0:
+ return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
+ case 1:
+ return "#";
+ case 2:
+ return "st1\\t{%1.<Vetype>}[0], %0";
+ default:
+ gcc_unreachable ();
+ }
+ }
+ "&& reload_completed
+ && REG_P (operands[0])
+ && REGNO (operands[0]) == REGNO (operands[1])"
+ [(const_int 0)]
+ {
+ emit_note (NOTE_INSN_DELETED);
+ DONE;
+ }
+ [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
+)
+
;; Extract an element from the Advanced SIMD portion of the register.
;; We don't just reuse the aarch64-simd.md pattern because we don't
-;; want any chnage in lane number on big-endian targets.
+;; want any change in lane number on big-endian targets.
(define_insn "*vec_extract<mode><Vel>_v128"
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
(vec_select:<VEL>
(match_operand:SVE_ALL 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "const_int_operand")])))]
"TARGET_SVE
- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 15)"
+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
{
- operands[1] = gen_lowpart (<V128>mode, operands[1]);
+ operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
switch (which_alternative)
{
case 0: