diff options
author | Alan Lawrence <alan.lawrence@arm.com> | 2014-09-05 10:50:04 +0000 |
---|---|---|
committer | Alan Lawrence <alalaw01@gcc.gnu.org> | 2014-09-05 10:50:04 +0000 |
commit | ddeabd3e6669f209c8c6aa831cbdb5e6dcf62d41 (patch) | |
tree | 77c1a64665a09ccb722416e639e91de379244c7c /gcc/config | |
parent | e625e71548596d96bd4005194fc5d8f70966ad71 (diff) | |
download | gcc-ddeabd3e6669f209c8c6aa831cbdb5e6dcf62d41.zip gcc-ddeabd3e6669f209c8c6aa831cbdb5e6dcf62d41.tar.gz gcc-ddeabd3e6669f209c8c6aa831cbdb5e6dcf62d41.tar.bz2 |
[PATCH AArch64 1/2] Improve codegen of vector compares inc. tst instruction
gcc/:
* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
TYPES_TST): Define.
(aarch64_fold_builtin): Update pattern for cmtst.
* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
Declare.
* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
Switch operands, separate out more cases, refactor.
(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
argument; rename old version to...
(aarch64_const_vec_all_same_in_range_p): ...this.
(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
gcc/testsuite/:
* gcc.target/aarch64/simd/int_comparisons.x: New file.
* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
From-SVN: r214948
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 7 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 90 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 68 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 6 |
6 files changed, 114 insertions, 60 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 3d2ffa0..32e902d 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -144,6 +144,11 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; #define TYPES_BINOP (aarch64_types_binop_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_internal, qualifier_internal }; +#define TYPES_TST (aarch64_types_cmtst_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_none, qualifier_none }; #define TYPES_BINOPV (aarch64_types_binopv_qualifiers) @@ -1285,7 +1290,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, BUILTIN_VALLDI (BINOP, cmeq, 0) return fold_build2 (EQ_EXPR, type, args[0], args[1]); break; - BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + BUILTIN_VSDQ_I_DI (TST, cmtst, 0) { tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]); tree vec_zero_node = build_zero_cst (type); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 6878f7d..a041cad 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -180,6 +180,7 @@ bool aarch64_cannot_change_mode_class (enum machine_mode, enum reg_class); enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); +bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_constant_address_p (rtx); bool aarch64_expand_movmem (rtx *); bool aarch64_float_const_zero_rtx_p (rtx); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index aa19130..9bde1ac 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -246,7 +246,7 @@ /* Implemented by aarch64_cm<cmp><mode>. */ BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0) BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) - BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + BUILTIN_VSDQ_I_DI (TST, cmtst, 0) /* Implemented by reduc_<sur>plus_<mode>. */ BUILTIN_VALL (UNOP, reduc_splus_, 10) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8ffc75e..6a45e91 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1912,58 +1912,94 @@ (match_operand:VDQ 2 "nonmemory_operand")))] "TARGET_SIMD" { - int inverse = 0, has_zero_imm_form = 0; rtx op1 = operands[1]; rtx op2 = operands[2]; rtx mask = gen_reg_rtx (<MODE>mode); + enum rtx_code code = GET_CODE (operands[3]); + + /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn), + and desirable for other comparisons if it results in FOO ? -1 : 0 + (this allows direct use of the comparison result without a bsl). */ + if (code == NE + || (code != EQ + && op1 == CONST0_RTX (<V_cmp_result>mode) + && op2 == CONSTM1_RTX (<V_cmp_result>mode))) + { + op1 = operands[2]; + op2 = operands[1]; + switch (code) + { + case LE: code = GT; break; + case LT: code = GE; break; + case GE: code = LT; break; + case GT: code = LE; break; + /* No case EQ. */ + case NE: code = EQ; break; + case LTU: code = GEU; break; + case LEU: code = GTU; break; + case GTU: code = LEU; break; + case GEU: code = LTU; break; + default: gcc_unreachable (); + } + } - switch (GET_CODE (operands[3])) + /* Make sure we can handle the last operand. */ + switch (code) { + case NE: + /* Normalized to EQ above. */ + gcc_unreachable (); + case LE: case LT: - case NE: - inverse = 1; - /* Fall through. */ case GE: case GT: case EQ: - has_zero_imm_form = 1; - break; - case LEU: - case LTU: - inverse = 1; - break; + /* These instructions have a form taking an immediate zero. */ + if (operands[5] == CONST0_RTX (<MODE>mode)) + break; + /* Fall through, as may need to load into register. */ default: + if (!REG_P (operands[5])) + operands[5] = force_reg (<MODE>mode, operands[5]); break; } - if (!REG_P (operands[5]) - && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form)) - operands[5] = force_reg (<MODE>mode, operands[5]); - - switch (GET_CODE (operands[3])) + switch (code) { case LT: + emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5])); + break; + case GE: emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); break; case LE: + emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5])); + break; + case GT: emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); break; case LTU: + emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4])); + break; + case GEU: emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); break; case LEU: + emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4])); + break; + case GTU: emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); break; - case NE: + /* NE has been normalized to EQ above. */ case EQ: emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); break; @@ -1972,12 +2008,6 @@ gcc_unreachable (); } - if (inverse) - { - op1 = operands[2]; - op2 = operands[1]; - } - /* If we have (a = (b CMP c) ? -1 : 0); Then we can simply move the generated mask. */ @@ -3932,14 +3962,22 @@ ;; cmtst +;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, +;; we don't have any insns using ne, and aarch64_vcond_internal outputs +;; not (neg (eq (and x y) 0)) +;; which is rewritten by simplify_rtx as +;; plus (eq (and x y) 0) -1. + (define_insn "aarch64_cmtst<mode>" [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") - (neg:<V_cmp_result> - (ne:<V_cmp_result> + (plus:<V_cmp_result> + (eq:<V_cmp_result> (and:VDQ (match_operand:VDQ 1 "register_operand" "w") (match_operand:VDQ 2 "register_operand" "w")) - (vec_duplicate:<V_cmp_result> (const_int 0)))))] + (match_operand:VDQ 3 "aarch64_simd_imm_zero")) + (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one"))) + ] "TARGET_SIMD" "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" [(set_attr "type" "neon_tst<q>")] diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 81ea0dc..023f9fd 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -137,9 +137,6 @@ static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); static bool aarch64_vector_mode_supported_p (enum machine_mode); static unsigned bit_count (unsigned HOST_WIDE_INT); -static bool aarch64_const_vec_all_same_int_p (rtx, - HOST_WIDE_INT, HOST_WIDE_INT); - static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool); @@ -3576,6 +3573,36 @@ aarch64_get_condition_code (rtx x) } } +bool +aarch64_const_vec_all_same_in_range_p (rtx x, + HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) +{ + HOST_WIDE_INT firstval; + int count, i; + + if (GET_CODE (x) != CONST_VECTOR + || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) + return false; + + firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); + if (firstval < minval || firstval > maxval) + return false; + + count = CONST_VECTOR_NUNITS (x); + for (i = 1; i < count; i++) + if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) + return false; + + return true; +} + +bool +aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val) +{ + return aarch64_const_vec_all_same_in_range_p (x, val, val); +} + static unsigned bit_count (unsigned HOST_WIDE_INT value) { @@ -3827,9 +3854,10 @@ aarch64_print_operand (FILE *f, rtx x, char code) case CONST_VECTOR: if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) { - gcc_assert (aarch64_const_vec_all_same_int_p (x, - HOST_WIDE_INT_MIN, - HOST_WIDE_INT_MAX)); + gcc_assert ( + aarch64_const_vec_all_same_in_range_p (x, + HOST_WIDE_INT_MIN, + HOST_WIDE_INT_MAX)); asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0))); } else if (aarch64_simd_imm_zero_p (x, GET_MODE (x))) @@ -7732,39 +7760,15 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, #undef CHECK } -static bool -aarch64_const_vec_all_same_int_p (rtx x, - HOST_WIDE_INT minval, - HOST_WIDE_INT maxval) -{ - HOST_WIDE_INT firstval; - int count, i; - - if (GET_CODE (x) != CONST_VECTOR - || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) - return false; - - firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); - if (firstval < minval || firstval > maxval) - return false; - - count = CONST_VECTOR_NUNITS (x); - for (i = 1; i < count; i++) - if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) - return false; - - return true; -} - /* Check of immediate shift constants are within range. */ bool aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left) { int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; if (left) - return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1); + return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1); else - return aarch64_const_vec_all_same_int_p (x, 1, bit_width); + return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width); } /* Return true if X is a uniform vector where all elements diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index c1510ca..8191169 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -273,3 +273,9 @@ { return aarch64_simd_imm_zero_p (op, mode); }) + +(define_special_predicate "aarch64_simd_imm_minus_one" + (match_code "const_vector") +{ + return aarch64_const_vec_all_same_int_p (op, -1); +}) |