diff options
author | Tamar Christina <tamar.christina@arm.com> | 2019-01-16 18:29:00 +0000 |
---|---|---|
committer | Tamar Christina <tnfchris@gcc.gnu.org> | 2019-01-16 18:29:00 +0000 |
commit | 33b5a38c2dc961e9dd3e28ffd535d81c40d2b7bb (patch) | |
tree | 97ae3064583c6ca6839ea87941b7f2de97780fdf | |
parent | 4e7cbff5c2a391f025b29cc0ce05e1d7d01b5d59 (diff) | |
download | gcc-33b5a38c2dc961e9dd3e28ffd535d81c40d2b7bb.zip gcc-33b5a38c2dc961e9dd3e28ffd535d81c40d2b7bb.tar.gz gcc-33b5a38c2dc961e9dd3e28ffd535d81c40d2b7bb.tar.bz2 |
aarch64-builtins.c (aarch64_simd_expand_args): Use correct max nunits for endian swap.
2019-01-16 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use
correct max nunits for endian swap.
(aarch64_expand_fcmla_builtin): Correct subreg code.
* config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>,
aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>): Correct
lane endianness.
From-SVN: r267983
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 26 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 17 |
3 files changed, 40 insertions, 12 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c0a7e23..9460ad2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-01-16 Tamar Christina <tamar.christina@arm.com> + + * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use + correct max nunits for endian swap. + (aarch64_expand_fcmla_builtin): Correct subreg code. + * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, + aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>): Correct + lane endianness. + 2019-01-16 Uroš Bizjak <ubizjak@gmail.com> * config/alpha/alpha.c (alpha_gimplify_va_arg): diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 04063e5..c8f5a55 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1197,7 +1197,9 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, = GET_MODE_NUNITS (vmode).to_constant (); aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ - op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); + int lane = INTVAL (op[opc]); + op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), + SImode); } /* Fall through - if the lane index isn't a constant then the next case will error. */ @@ -1443,14 +1445,12 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) int nunits = GET_MODE_NUNITS (quadmode).to_constant (); aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp); - /* Keep to GCC-vector-extension lane indices in the RTL. */ - lane_idx = aarch64_endian_lane_rtx (quadmode, INTVAL (lane_idx)); - /* Generate the correct register and mode. */ int lane = INTVAL (lane_idx); if (lane < nunits / 4) - op2 = simplify_gen_subreg (d->mode, op2, quadmode, 0); + op2 = simplify_gen_subreg (d->mode, op2, quadmode, + subreg_lowpart_offset (d->mode, quadmode)); else { /* Select the upper 64 bits, either a V2SF or V4HF, this however @@ -1460,15 +1460,24 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) gen_highpart_mode generates code that isn't optimal. */ rtx temp1 = gen_reg_rtx (d->mode); rtx temp2 = gen_reg_rtx (DImode); - temp1 = simplify_gen_subreg (d->mode, op2, quadmode, 0); + temp1 = simplify_gen_subreg (d->mode, op2, quadmode, + subreg_lowpart_offset (d->mode, quadmode)); temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0); - emit_insn (gen_aarch64_get_lanev2di (temp2, temp1 , const1_rtx)); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx)); + else + emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx)); op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0); /* And recalculate the index. */ lane -= nunits / 4; } + /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4 + (max nunits in range check) are valid. Which means only 0-1, so we + only need to know the order in a V2mode. */ + lane_idx = aarch64_endian_lane_rtx (V2DImode, lane); + if (!target) target = gen_reg_rtx (d->mode); else @@ -1477,8 +1486,7 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) rtx pat = NULL_RTX; if (d->lane) - pat = GEN_FCN (d->icode) (target, op0, op1, op2, - gen_int_mode (lane, SImode)); + pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx); else pat = GEN_FCN (d->icode) (target, op0, op1, op2); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index be6c27d..805d7a8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -455,7 +455,10 @@ (match_operand:SI 4 "const_int_operand" "n")] FCMLA)))] "TARGET_COMPLEX" - "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>" +{ + operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4])); + return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; +} [(set_attr "type" "neon_fcmla")] ) @@ -467,7 +470,10 @@ (match_operand:SI 4 "const_int_operand" "n")] FCMLA)))] "TARGET_COMPLEX" - "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>" +{ + operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); + return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>"; +} [(set_attr "type" "neon_fcmla")] ) @@ -479,7 +485,12 @@ (match_operand:SI 4 "const_int_operand" "n")] FCMLA)))] "TARGET_COMPLEX" - "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>" +{ + int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant (); + operands[4] + = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode); + return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; +} [(set_attr "type" "neon_fcmla")] ) |