diff options
author | Michael Collison <collison@gcc.gnu.org> | 2016-04-25 05:03:09 +0000 |
---|---|---|
committer | Michael Collison <collison@gcc.gnu.org> | 2016-04-25 05:03:09 +0000 |
commit | 93c590ee1aa41d3df7298d7dd1596994163f6d74 (patch) | |
tree | 232a7b2f09a3eaab8167683bda4524742de85802 /gcc | |
parent | 151a16073f59ef1445d9e66a0f65e1ced9060416 (diff) | |
download | gcc-93c590ee1aa41d3df7298d7dd1596994163f6d74.zip gcc-93c590ee1aa41d3df7298d7dd1596994163f6d74.tar.gz gcc-93c590ee1aa41d3df7298d7dd1596994163f6d74.tar.bz2 |
neon.md (widen_<us>sum<mode>): New patterns where mode is VQI to improve mixed mode vectorization.
2016-04-25 Michael Collison <michael.collison@linaro.org>
* config/arm/neon.md (widen_<us>sum<mode>): New patterns where
mode is VQI to improve mixed mode vectorization.
* config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New
define_insn to match low half of signed vaddw.
* config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New
define_insn to match high half of signed vaddw.
* config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New
define_insn to match low half of unsigned vaddw.
* config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New
define_insn to match high half of unsigned vaddw.
* config/arm/arm.c (arm_simd_vect_par_cnst_half): New function.
(arm_simd_check_vect_par_cnst_half_p): Likewise.
* config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype
for new function.
(arm_simd_check_vect_par_cnst_half_p): Likewise.
* config/arm/predicates.md (vect_par_constant_high): Support
big endian and simplify by calling
arm_simd_check_vect_par_cnst_half
(vect_par_constant_low): Likewise.
* testsuite/gcc.target/arm/neon-vaddws16.c: New test.
* testsuite/gcc.target/arm/neon-vaddws32.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu16.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu32.c: New test.
* testsuite/gcc.target/arm/neon-vaddwu8.c: New test.
* testsuite/lib/target-supports.exp
(check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate
that arm neon support vector widen sum of HImode TO SImode.
From-SVN: r235402
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 76 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 123 | ||||
-rw-r--r-- | gcc/config/arm/predicates.md | 50 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddws16.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddws32.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu16.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu32.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/neon-vaddwu8.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 2 |
12 files changed, 328 insertions, 52 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8e7059e..4b7607a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2016-04-25 Michael Collison <michael.collison@linaro.org> + + * config/arm/neon.md (widen_<us>sum<mode>): New patterns where + mode is VQI to improve mixed mode vectorization. + * config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New + define_insn to match low half of signed vaddw. + * config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New + define_insn to match high half of signed vaddw. + * config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New + define_insn to match low half of unsigned vaddw. + * config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New + define_insn to match high half of unsigned vaddw. + * config/arm/arm.c (arm_simd_vect_par_cnst_half): New function. + (arm_simd_check_vect_par_cnst_half_p): Likewise. + * config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype + for new function. + (arm_simd_check_vect_par_cnst_half_p): Likewise. + * config/arm/predicates.md (vect_par_constant_high): Support + big endian and simplify by calling + arm_simd_check_vect_par_cnst_half + (vect_par_constant_low): Likewise. + 2016-04-25 Uros Bizjak <ubizjak@gmail.com> * config/i386/i386.md (*lea<mode>_general_4): Use const_0_to_3_operand diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 0083673..d8179c4 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED); extern void arm_init_builtins (void); extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); - +extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high); +extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode, + bool high); #ifdef RTX_CODE extern bool arm_vector_mode_supported_p (machine_mode); extern bool arm_small_register_classes_for_mode_p (machine_mode); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4d9f6f1..71b5143 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30302,4 +30302,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, return; } + +/* Construct and return a PARALLEL RTX vector with elements numbering the + lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of + the vector - from the perspective of the architecture. This does not + line up with GCC's perspective on lane numbers, so we end up with + different masks depending on our target endian-ness. The diagram + below may help. We must draw the distinction when building masks + which select one half of the vector. An instruction selecting + architectural low-lanes for a big-endian target, must be described using + a mask selecting GCC high-lanes. + + Big-Endian Little-Endian + +GCC 0 1 2 3 3 2 1 0 + | x | x | x | x | | x | x | x | x | +Architecture 3 2 1 0 3 2 1 0 + +Low Mask: { 2, 3 } { 0, 1 } +High Mask: { 0, 1 } { 2, 3 } +*/ + +rtx +arm_simd_vect_par_cnst_half (machine_mode mode, bool high) +{ + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits / 2); + int high_base = nunits / 2; + int low_base = 0; + int base; + rtx t1; + int i; + + if (BYTES_BIG_ENDIAN) + base = high ? low_base : high_base; + else + base = high ? high_base : low_base; + + for (i = 0; i < nunits / 2; i++) + RTVEC_ELT (v, i) = GEN_INT (base + i); + + t1 = gen_rtx_PARALLEL (mode, v); + return t1; +} + +/* Check OP for validity as a PARALLEL RTX vector with elements + numbering the lanes of either the high (HIGH == TRUE) or low lanes, + from the perspective of the architecture. See the diagram above + arm_simd_vect_par_cnst_half_p for more details. */ + +bool +arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode, + bool high) +{ + rtx ideal = arm_simd_vect_par_cnst_half (mode, high); + HOST_WIDE_INT count_op = XVECLEN (op, 0); + HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); + int i = 0; + + if (!VECTOR_MODE_P (mode)) + return false; + + if (count_op != count_ideal) + return false; + + for (i = 0; i < count_ideal; i++) + { + rtx elt_op = XVECEXP (op, 0, i); + rtx elt_ideal = XVECEXP (ideal, 0, i); + + if (!CONST_INT_P (elt_op) + || INTVAL (elt_ideal) != INTVAL (elt_op)) + return false; + } + return true; +} + #include "gt-arm.h" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 879c07c..6b4896d 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1204,16 +1204,133 @@ ;; Widening operations +(define_expand "widen_ssum<mode>3" + [(set (match_operand:<V_double_width> 0 "s_register_operand" "") + (plus:<V_double_width> + (sign_extend:<V_double_width> + (match_operand:VQI 1 "s_register_operand" "")) + (match_operand:<V_double_width> 2 "s_register_operand" "")))] + "TARGET_NEON" + { + machine_mode mode = GET_MODE (operands[1]); + rtx p1, p2; + + p1 = arm_simd_vect_par_cnst_half (mode, false); + p2 = arm_simd_vect_par_cnst_half (mode, true); + + if (operands[0] != operands[2]) + emit_move_insn (operands[0], operands[2]); + + emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], + operands[1], + p1, + operands[0])); + emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], + operands[1], + p2, + operands[0])); + DONE; + } +) + +(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> + (sign_extend:<VW:V_widen> + (vec_select:VW + (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_low" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : + "vaddw.<V_s_elem>\t%q0, %q3, %e1"; +} + [(set_attr "type" "neon_add_widen")]) + +(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> + (sign_extend:<VW:V_widen> + (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_high" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : + "vaddw.<V_s_elem>\t%q0, %q3, %f1"; +} + [(set_attr "type" "neon_add_widen")]) + (define_insn "widen_ssum<mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") - (plus:<V_widen> (sign_extend:<V_widen> - (match_operand:VW 1 "s_register_operand" "%w")) - (match_operand:<V_widen> 2 "s_register_operand" "w")))] + (plus:<V_widen> + (sign_extend:<V_widen> + (match_operand:VW 1 "s_register_operand" "%w")) + (match_operand:<V_widen> 2 "s_register_operand" "w")))] "TARGET_NEON" "vaddw.<V_s_elem>\t%q0, %q2, %P1" [(set_attr "type" "neon_add_widen")] ) +(define_expand "widen_usum<mode>3" + [(set (match_operand:<V_double_width> 0 "s_register_operand" "") + (plus:<V_double_width> + (zero_extend:<V_double_width> + (match_operand:VQI 1 "s_register_operand" "")) + (match_operand:<V_double_width> 2 "s_register_operand" "")))] + "TARGET_NEON" + { + machine_mode mode = GET_MODE (operands[1]); + rtx p1, p2; + + p1 = arm_simd_vect_par_cnst_half (mode, false); + p2 = arm_simd_vect_par_cnst_half (mode, true); + + if (operands[0] != operands[2]) + emit_move_insn (operands[0], operands[2]); + + emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], + operands[1], + p1, + operands[0])); + emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], + operands[1], + p2, + operands[0])); + DONE; + } +) + +(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> + (zero_extend:<VW:V_widen> + (vec_select:VW + (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_low" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : + "vaddw.<V_u_elem>\t%q0, %q3, %e1"; +} + [(set_attr "type" "neon_add_widen")]) + +(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3" + [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") + (plus:<VW:V_widen> + (zero_extend:<VW:V_widen> + (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") + (match_operand:VQI 2 "vect_par_constant_high" ""))) + (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] + "TARGET_NEON" +{ + return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : + "vaddw.<V_u_elem>\t%q0, %q3, %f1"; +} + [(set_attr "type" "neon_add_widen")]) + (define_insn "widen_usum<mode>3" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") (plus:<V_widen> (zero_extend:<V_widen> diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index b1cd556..ad92f6c 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -612,59 +612,13 @@ (define_special_predicate "vect_par_constant_high" (match_code "parallel") { - HOST_WIDE_INT count = XVECLEN (op, 0); - int i; - int base = GET_MODE_NUNITS (mode); - - if ((count < 1) - || (count != base/2)) - return false; - - if (!VECTOR_MODE_P (mode)) - return false; - - for (i = 0; i < count; i++) - { - rtx elt = XVECEXP (op, 0, i); - int val; - - if (!CONST_INT_P (elt)) - return false; - - val = INTVAL (elt); - if (val != (base/2) + i) - return false; - } - return true; + return arm_simd_check_vect_par_cnst_half_p (op, mode, true); }) (define_special_predicate "vect_par_constant_low" (match_code "parallel") { - HOST_WIDE_INT count = XVECLEN (op, 0); - int i; - int base = GET_MODE_NUNITS (mode); - - if ((count < 1) - || (count != base/2)) - return false; - - if (!VECTOR_MODE_P (mode)) - return false; - - for (i = 0; i < count; i++) - { - rtx elt = XVECEXP (op, 0, i); - int val; - - if (!CONST_INT_P (elt)) - return false; - - val = INTVAL (elt); - if (val != i) - return false; - } - return true; + return arm_simd_check_vect_par_cnst_half_p (op, mode, false); }) (define_predicate "const_double_vcvt_power_of_two_reciprocal" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 82e814a..d49f189 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2016-04-25 Michael Collison <michael.collison@arm.com> + + * testsuite/gcc.target/arm/neon-vaddws16.c: New test. + * testsuite/gcc.target/arm/neon-vaddws32.c: New test. + * testsuite/gcc.target/arm/neon-vaddwu16.c: New test. + * testsuite/gcc.target/arm/neon-vaddwu32.c: New test. + * testsuite/gcc.target/arm/neon-vaddwu8.c: New test. + * testsuite/lib/target-supports.exp + (check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate + that arm neon support vector widen sum of HImode TO SImode. + 2016-04-23 Jakub Jelinek <jakub@redhat.com> PR sanitizer/70712 diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws16.c b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c new file mode 100644 index 0000000..8281134 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_neon } */ + + + +int +t6 (int len, void * dummy, short * __restrict x) +{ + len = len & ~31; + int result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.s16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws32.c b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c new file mode 100644 index 0000000..8c18691 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_neon } */ + + +int +t6 (int len, void * dummy, int * __restrict x) +{ + len = len & ~31; + long long result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.s32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c new file mode 100644 index 0000000..580bb06 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_neon } */ + + +int +t6 (int len, void * dummy, unsigned short * __restrict x) +{ + len = len & ~31; + unsigned int result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw.u16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c new file mode 100644 index 0000000..21b0633 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_neon } */ + + +int +t6 (int len, void * dummy, unsigned int * __restrict x) +{ + len = len & ~31; + unsigned long long result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c new file mode 100644 index 0000000..d350ed5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_neon } */ + + + +int +t6 (int len, void * dummy, char * __restrict x) +{ + len = len & ~31; + unsigned short result = 0; + __asm volatile (""); + for (int i = 0; i < len; i++) + result += x[i]; + return result; +} + +/* { dg-final { scan-assembler "vaddw\.u8" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3d44e17..422bbab 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4348,6 +4348,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { set et_vect_widen_sum_hi_to_si_pattern_saved 0 if { [istarget powerpc*-*-*] || [istarget aarch64*-*-*] + || ([istarget arm*-*-*] && + [check_effective_target_arm_neon_ok]) || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_pattern_saved 1 } |