diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2019-11-14 15:31:25 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-11-14 15:31:25 +0000 |
commit | 9c437a108a14b9bdc44659c131b0da944e5ffeab (patch) | |
tree | e28c10ce8b60ec8052a44093e947d44c2db4e240 /gcc | |
parent | 05101d1b575a57ca26e4275e971da85a0dd1d52a (diff) | |
download | gcc-9c437a108a14b9bdc44659c131b0da944e5ffeab.zip gcc-9c437a108a14b9bdc44659c131b0da944e5ffeab.tar.gz gcc-9c437a108a14b9bdc44659c131b0da944e5ffeab.tar.bz2 |
Vectorise conversions between differently-sized integer vectors
This patch adds AArch64 patterns for converting between 64-bit and
128-bit integer vectors, and makes the vectoriser and expand pass
use them.
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-cfg.c (verify_gimple_assign_unary): Handle conversions
between vector types.
* tree-vect-stmts.c (vectorizable_conversion): Extend the
non-widening and non-narrowing path to handle standard
conversion codes, if the target supports them.
* expr.c (convert_move): Try using the extend and truncate optabs
for vectors.
* optabs-tree.c (supportable_convert_operation): Likewise.
* config/aarch64/iterators.md (Vnarroqw): New iterator.
* config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
(trunc<mode><Vnarrowq>2): New patterns.
gcc/testsuite/
* gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
to fail for aarch64 targets.
* gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
on aarch64 targets.
* gcc.dg/vect/vect-double-reduc-5.c: Likewise.
* gcc.dg/vect/vect-outer-4e.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
* gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.
From-SVN: r278245
Diffstat (limited to 'gcc')
21 files changed, 280 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6bb0a80..2798f15 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,19 @@ 2019-11-14 Richard Sandiford <richard.sandiford@arm.com> + * tree-cfg.c (verify_gimple_assign_unary): Handle conversions + between vector types. + * tree-vect-stmts.c (vectorizable_conversion): Extend the + non-widening and non-narrowing path to handle standard + conversion codes, if the target supports them. + * expr.c (convert_move): Try using the extend and truncate optabs + for vectors. + * optabs-tree.c (supportable_convert_operation): Likewise. + * config/aarch64/iterators.md (Vnarroqw): New iterator. + * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2) + (trunc<mode><Vnarrowq>2): New patterns. + +2019-11-14 Richard Sandiford <richard.sandiford@arm.com> + * tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't require vectype and nunits_vectype to have the same size; instead assert that nunits_vectype has at least as many diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6f7fb1c..ad4676b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7007,3 +7007,21 @@ "pmull2\\t%0.1q, %1.2d, %2.2d" [(set_attr "type" "crypto_pmull")] ) + +;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector. +(define_insn "<optab><Vnarrowq><mode>2" + [(set (match_operand:VQN 0 "register_operand" "=w") + (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))] + "TARGET_SIMD" + "<su>xtl\t%0.<Vtype>, %1.<Vntype>" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; Truncate a 128-bit integer vector to a 64-bit vector. +(define_insn "trunc<mode><Vnarrowq>2" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] + "TARGET_SIMD" + "xtn\t%0.<Vntype>, %1.<Vtype>" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 2fd5467..3e37f80 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -927,6 +927,8 @@ (V2DI "V2SI") (DI "SI") (SI "HI") (HI "QI")]) +(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi") + (V2DI "v2si")]) ;; Narrowed quad-modes for VQN (Used for XTN2). (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI") @@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsignedp) if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode)) { + if (GET_MODE_UNIT_PRECISION (to_mode) + > GET_MODE_UNIT_PRECISION (from_mode)) + { + optab op = unsignedp ? zext_optab : sext_optab; + insn_code icode = convert_optab_handler (op, to_mode, from_mode); + if (icode != CODE_FOR_nothing) + { + emit_unop_insn (icode, to, from, + unsignedp ? ZERO_EXTEND : SIGN_EXTEND); + return; + } + } + + if (GET_MODE_UNIT_PRECISION (to_mode) + < GET_MODE_UNIT_PRECISION (from_mode)) + { + insn_code icode = convert_optab_handler (trunc_optab, + to_mode, from_mode); + if (icode != CODE_FOR_nothing) + { + emit_unop_insn (icode, to, from, TRUNCATE); + return; + } + } + gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode), GET_MODE_BITSIZE (to_mode))); diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index a5ecbf0..97cc592 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -303,6 +303,20 @@ supportable_convert_operation (enum tree_code code, return true; } + if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2) + && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in))) + { + *code1 = code; + return true; + } + + if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2) + && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing) + { + *code1 = code; + return true; + } + /* Now check for builtin. */ if (targetm.vectorize.builtin_conversion && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c1fa465..29da1f3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,23 @@ 2019-11-14 Richard Sandiford <richard.sandiford@arm.com> + * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization + to fail for aarch64 targets. + * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass + on aarch64 targets. + * gcc.dg/vect/vect-double-reduc-5.c: Likewise. + * gcc.dg/vect/vect-outer-4e.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_5.c: New test. + * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise. + * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise. + +2019-11-14 Richard Sandiford <richard.sandiford@arm.com> + * gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64 targets. * gcc.dg/vect/vect-outer-4g.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c index 85f9a02..813b1af 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c @@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2) } /* Disable for SVE because for long or variable-length vectors we don't - get an unrolled epilogue loop. */ -/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */ + get an unrolled epilogue loop. Also disable for AArch64 Advanced SIMD, + because there we can vectorize the epilogue using mixed vector sizes. */ +/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c index e4202b1..b5f8c3c 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c @@ -46,4 +46,4 @@ int main (void) } /* Until we support multiple types in the inner loop */ -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c index 0ba3389..079704c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c @@ -52,5 +52,5 @@ int main () /* Vectorization of loops with multiple types and double reduction is not supported yet. */ -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c index 13238db..e65a092 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c @@ -23,4 +23,4 @@ foo (){ return; } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c new file mode 100644 index 0000000..81e77a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int16_t *y, uint8_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c new file mode 100644 index 0000000..d9da6c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int64_t *y, int64_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c new file mode 100644 index 0000000..80dab8b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int32_t *y, int32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c new file mode 100644 index 0000000..655fa7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int8_t *x, int16_t *y, int16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c new file mode 100644 index 0000000..ca8a65a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int64_t *x, int64_t *y, int32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c new file mode 100644 index 0000000..6c09b5b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int32_t *y, int16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c new file mode 100644 index 0000000..94a66c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int16_t *x, int16_t *y, int8_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 8]; + } +} + +/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c new file mode 100644 index 0000000..9531966 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int64_t *x, int64_t *y, uint32_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 2]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c new file mode 100644 index 0000000..de8f698 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +#include <stdint.h> + +void +f (int32_t *x, int32_t *y, uint16_t *z, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i] = z[i]; + y[i] += y[i - 4]; + } +} + +/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index b75fdb2..eb268e3 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3553,6 +3553,24 @@ verify_gimple_assign_unary (gassign *stmt) { CASE_CONVERT: { + /* Allow conversions between vectors with the same number of elements, + provided that the conversion is OK for the element types too. */ + if (VECTOR_TYPE_P (lhs_type) + && VECTOR_TYPE_P (rhs1_type) + && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type), + TYPE_VECTOR_SUBPARTS (rhs1_type))) + { + lhs_type = TREE_TYPE (lhs_type); + rhs1_type = TREE_TYPE (rhs1_type); + } + else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type)) + { + error ("invalid vector types in nop conversion"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; + } + /* Allow conversions from pointer type to integral type only if there is no sign or zero extension involved. For targets were the precision of ptrofftype doesn't match that diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 5f30d1e..a65b4cb 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, switch (modifier) { case NONE: - if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) + if (code != FIX_TRUNC_EXPR + && code != FLOAT_EXPR + && !CONVERT_EXPR_CODE_P (code)) return false; if (supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1)) |