diff options
author | Joseph Myers <joseph@codesourcery.com> | 2008-08-07 17:58:29 +0100 |
---|---|---|
committer | Joseph Myers <jsm28@gcc.gnu.org> | 2008-08-07 17:58:29 +0100 |
commit | 874d42b93e5926d5b225b97be7608a8c8836614b (patch) | |
tree | 1bd392ba1acdeacf8eb608a911fce0a4c62b053d /gcc/config/arm | |
parent | 058514b381c9f8225cfd63731d90006c37a83139 (diff) | |
download | gcc-874d42b93e5926d5b225b97be7608a8c8836614b.zip gcc-874d42b93e5926d5b225b97be7608a8c8836614b.tar.gz gcc-874d42b93e5926d5b225b97be7608a8c8836614b.tar.bz2 |
arm.c (output_move_neon): Update comment describing big-endian vector layout.
* config/arm/arm.c (output_move_neon): Update comment describing
big-endian vector layout.
(arm_assemble_integer): Do not handle big-endian NEON vectors
specially.
* config/arm/neon.md (vec_set<mode>_internal, vec_extract<mode>,
neon_vget_lane<mode>_sext_internal,
neon_vget_lane<mode>_zext_internal, neon_vget_lane<mode>): Adjust
element indices for big-endian.
From-SVN: r138847
Diffstat (limited to 'gcc/config/arm')
-rw-r--r-- | gcc/config/arm/arm.c | 46 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 67 |
2 files changed, 77 insertions, 36 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 7d1840a..a3a49f7 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -10335,30 +10335,28 @@ output_move_vfp (rtx *operands) } /* Output a Neon quad-word load or store, or a load or store for - larger structure modes. We could also support post-modify forms using - VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that - yet. - WARNING: The ordering of elements in memory is weird in big-endian mode, - because we use VSTM instead of VST1, to make it easy to make vector stores - via ARM registers write values in the same order as stores direct from Neon - registers. For example, the byte ordering of a quadword vector with 16-byte - elements like this: + larger structure modes. - [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first) + WARNING: The ordering of elements is weird in big-endian mode, + because we use VSTM, as required by the EABI. GCC RTL defines + element ordering based on in-memory order. This can be differ + from the architectural ordering of elements within a NEON register. + The intrinsics defined in arm_neon.h use the NEON register element + ordering, not the GCC RTL element ordering. - will be (with lowest address first, h = most-significant byte, - l = least-significant byte of element): + For example, the in-memory ordering of a big-endian a quadword + vector with 16-bit elements when stored from register pair {d0,d1} + will be (lowest address first, d0[N] is NEON register element N): - [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l, - e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l] + [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]] - When necessary, quadword registers (dN, dN+1) are moved to ARM registers from - rN in the order: + When necessary, quadword registers (dN, dN+1) are moved to ARM + registers from rN in the order: dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2) - So that STM/LDM can be used on vectors in ARM registers, and the same memory - layout will result as if VSTM/VLDM were used. */ + So that STM/LDM can be used on vectors in ARM registers, and the + same memory layout will result as if VSTM/VLDM were used. */ const char * output_move_neon (rtx *operands) @@ -13326,28 +13324,16 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p) if (arm_vector_mode_supported_p (mode)) { int i, units; - unsigned int invmask = 0, parts_per_word; gcc_assert (GET_CODE (x) == CONST_VECTOR); units = CONST_VECTOR_NUNITS (x); size = GET_MODE_SIZE (GET_MODE_INNER (mode)); - /* For big-endian Neon vectors, we must permute the vector to the form - which, when loaded by a VLDR or VLDM instruction, will give a vector - with the elements in the right order. */ - if (TARGET_NEON && WORDS_BIG_ENDIAN) - { - parts_per_word = UNITS_PER_WORD / size; - /* FIXME: This might be wrong for 64-bit vector elements, but we don't - support those anywhere yet. */ - invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1; - } - if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) for (i = 0; i < units; i++) { - rtx elt = CONST_VECTOR_ELT (x, i ^ invmask); + rtx elt = CONST_VECTOR_ELT (x, i); assemble_integer (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); } diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 0c312e7..8d10c1e 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -735,7 +735,10 @@ (match_operand:SI 2 "immediate_operand" "i")))] "TARGET_NEON" { - operands[2] = GEN_INT (ffs ((int) INTVAL (operands[2]) - 1)); + int elt = ffs ((int) INTVAL (operands[2]) - 1); + if (BYTES_BIG_ENDIAN) + elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; + operands[2] = GEN_INT (elt); return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1"; } @@ -757,6 +760,9 @@ int hi = (elem / half_elts) * 2; int regno = REGNO (operands[0]); + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); operands[2] = GEN_INT (elt); @@ -804,7 +810,15 @@ (match_operand:VD 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] "TARGET_NEON" - "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]"; +} [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) @@ -821,6 +835,9 @@ int hi = (INTVAL (operands[2]) / half_elts) * 2; int regno = REGNO (operands[1]); + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); operands[2] = GEN_INT (elt); @@ -2413,7 +2430,15 @@ (match_operand:VD 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" - "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]"; +} [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) @@ -2425,7 +2450,15 @@ (match_operand:VD 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" - "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]"; +} [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_bp_simple")] ) @@ -2442,10 +2475,14 @@ int regno = REGNO (operands[1]); unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; ops[0] = operands[0]; ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); - ops[2] = GEN_INT (elt % halfelts); + ops[2] = GEN_INT (elt_adj); output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops); return ""; @@ -2466,10 +2503,14 @@ int regno = REGNO (operands[1]); unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; ops[0] = operands[0]; ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); - ops[2] = GEN_INT (elt % halfelts); + ops[2] = GEN_INT (elt_adj); output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops); return ""; @@ -2490,6 +2531,20 @@ neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode)); + if (BYTES_BIG_ENDIAN) + { + /* The intrinsics are defined in terms of a model where the + element ordering in memory is vldm order, whereas the generic + RTL is defined in terms of a model where the element ordering + in memory is array order. Convert the lane number to conform + to this model. */ + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32) insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]); else |