diff options
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 26 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 139 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 4 |
6 files changed, 184 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index db763a6..b655476 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,21 @@ 2018-02-01 Richard Sandiford <richard.sandiford@linaro.org> + * config/aarch64/aarch64-protos.h (aarch64_split_sve_subreg_move) + (aarch64_maybe_expand_sve_subreg_move): Declare. + * config/aarch64/aarch64.md (UNSPEC_REV_SUBREG): New unspec. + * config/aarch64/predicates.md (aarch64_any_register_operand): New + predicate. + * config/aarch64/aarch64-sve.md (mov<mode>): Optimize subreg moves + that are semantically a reverse operation. + (*aarch64_sve_mov<mode>_subreg_be): New pattern. + * config/aarch64/aarch64.c (aarch64_maybe_expand_sve_subreg_move): + (aarch64_replace_reg_mode, aarch64_split_sve_subreg_move): New + functions. + (aarch64_can_change_mode_class): For big-endian, forbid changes + between two SVE modes if they have different element sizes. + +2018-02-01 Richard Sandiford <richard.sandiford@linaro.org> + * config/aarch64/aarch64.c (aarch64_expand_sve_const_vector): Prefer the TImode handling for big-endian targets. diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ef1b0bc..cda2895 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -447,6 +447,8 @@ void aarch64_expand_epilogue (bool); void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); +bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); +void aarch64_split_sve_subreg_move (rtx, rtx, rtx); void aarch64_expand_prologue (void); void aarch64_expand_vector_init (rtx, rtx); void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 068fd8c..9140862 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -84,6 +84,32 @@ gen_vec_duplicate<mode>); DONE; } + + /* Optimize subregs on big-endian targets: we can use REV[BHW] + instead of going through memory. */ + if (BYTES_BIG_ENDIAN + && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) + DONE; + } +) + +;; A pattern for optimizing SUBREGs that have a reinterpreting effect +;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move +;; for details. We use a special predicate for operand 2 to reduce +;; the number of patterns. +(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" + [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") + (unspec:SVE_ALL + [(match_operand:VNx16BI 1 "register_operand" "Upl") + (match_operand 2 "aarch64_any_register_operand" "w")] + UNSPEC_REV_SUBREG))] + "TARGET_SVE && BYTES_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); + DONE; } ) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6296ffe..7b34bdf 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3074,6 +3074,120 @@ aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode) aarch64_emit_sve_pred_move (dest, ptrue, src); } +/* Called only on big-endian targets. See whether an SVE vector move + from SRC to DEST is effectively a REV[BHW] instruction, because at + least one operand is a subreg of an SVE vector that has wider or + narrower elements. Return true and emit the instruction if so. + + For example: + + (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0)) + + represents a VIEW_CONVERT between the following vectors, viewed + in memory order: + + R2: { [0].high, [0].low, [1].high, [1].low, ... } + R1: { [0], [1], [2], [3], ... } + + The high part of lane X in R2 should therefore correspond to lane X*2 + of R1, but the register representations are: + + msb lsb + R2: ...... [1].high [1].low [0].high [0].low + R1: ...... [3] [2] [1] [0] + + where the low part of lane X in R2 corresponds to lane X*2 in R1. + We therefore need a reverse operation to swap the high and low values + around. + + This is purely an optimization. Without it we would spill the + subreg operand to the stack in one mode and reload it in the + other mode, which has the same effect as the REV. */ + +bool +aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src) +{ + gcc_assert (BYTES_BIG_ENDIAN); + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + if (GET_CODE (src) == SUBREG) + src = SUBREG_REG (src); + + /* The optimization handles two single SVE REGs with different element + sizes. */ + if (!REG_P (dest) + || !REG_P (src) + || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA + || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA + || (GET_MODE_UNIT_SIZE (GET_MODE (dest)) + == GET_MODE_UNIT_SIZE (GET_MODE (src)))) + return false; + + /* Generate *aarch64_sve_mov<mode>_subreg_be. */ + rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); + rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src), + UNSPEC_REV_SUBREG); + emit_insn (gen_rtx_SET (dest, unspec)); + return true; +} + +/* Return a copy of X with mode MODE, without changing its other + attributes. Unlike gen_lowpart, this doesn't care whether the + mode change is valid. */ + +static rtx +aarch64_replace_reg_mode (rtx x, machine_mode mode) +{ + if (GET_MODE (x) == mode) + return x; + + x = shallow_copy_rtx (x); + set_mode_and_regno (x, mode, REGNO (x)); + return x; +} + +/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given + operands. */ + +void +aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) +{ + /* Decide which REV operation we need. The mode with narrower elements + determines the mode of the operands and the mode with the wider + elements determines the reverse width. */ + machine_mode mode_with_wider_elts = GET_MODE (dest); + machine_mode mode_with_narrower_elts = GET_MODE (src); + if (GET_MODE_UNIT_SIZE (mode_with_wider_elts) + < GET_MODE_UNIT_SIZE (mode_with_narrower_elts)) + std::swap (mode_with_wider_elts, mode_with_narrower_elts); + + unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts); + unsigned int unspec; + if (wider_bytes == 8) + unspec = UNSPEC_REV64; + else if (wider_bytes == 4) + unspec = UNSPEC_REV32; + else if (wider_bytes == 2) + unspec = UNSPEC_REV16; + else + gcc_unreachable (); + machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require (); + + /* Emit: + + (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)] + UNSPEC_MERGE_PTRUE)) + + with the appropriate modes. */ + ptrue = gen_lowpart (pred_mode, ptrue); + dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts); + src = aarch64_replace_reg_mode (src, mode_with_narrower_elts); + src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec); + src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src), + UNSPEC_MERGE_PTRUE); + emit_insn (gen_rtx_SET (dest, src)); +} + static bool aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) @@ -17197,10 +17311,27 @@ static bool aarch64_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t) { - /* See the comment at the head of aarch64-sve.md for details. */ - if (BYTES_BIG_ENDIAN - && (aarch64_sve_data_mode_p (from) != aarch64_sve_data_mode_p (to))) - return false; + if (BYTES_BIG_ENDIAN) + { + bool from_sve_p = aarch64_sve_data_mode_p (from); + bool to_sve_p = aarch64_sve_data_mode_p (to); + + /* Don't allow changes between SVE data modes and non-SVE modes. + See the comment at the head of aarch64-sve.md for details. */ + if (from_sve_p != to_sve_p) + return false; + + /* Don't allow changes in element size: lane 0 of the new vector + would not then be lane 0 of the old vector. See the comment + above aarch64_maybe_expand_sve_subreg_move for a more detailed + description. + + In the worst case, this forces a register to be spilled in + one mode and reloaded in the other, which handles the + endianness correctly. */ + if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)) + return false; + } return true; } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 49095f8..5a2a930 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -168,6 +168,7 @@ UNSPEC_INSR UNSPEC_CLASTB UNSPEC_FADDA + UNSPEC_REV_SUBREG ]) (define_c_enum "unspecv" [ diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 159e74a..804be16 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -617,3 +617,7 @@ (define_predicate "aarch64_gather_scale_operand_d" (and (match_code "const_int") (match_test "INTVAL (op) == 1 || INTVAL (op) == 8"))) + +;; A special predicate that doesn't match a particular mode. +(define_special_predicate "aarch64_any_register_operand" + (match_code "reg")) |