diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c | 5 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 17 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 19 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 10 |
7 files changed, 55 insertions, 11 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c index 2c06564..c9a8b82 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c @@ -1,5 +1,5 @@ -/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ /* ** f10: @@ -21,3 +21,4 @@ void f10(double * restrict z, double * restrict w, double * restrict x, double * /* { dg-final { scan-assembler-not {\tbic\t} } } */ /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, #0} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c index 0c3b78d..8d78f47 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c @@ -1,5 +1,5 @@ -/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ /* ** f11: @@ -21,3 +21,4 @@ void f11(double * restrict z, double * restrict w, double * restrict x, double * /* { dg-final { scan-assembler-not {\tbic\t} } } */ /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, #0.0} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c index 248f8ab..38fbfef 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c @@ -1,5 +1,5 @@ -/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ /* ** f12: @@ -19,3 +19,4 @@ void f12(int * restrict z, int * restrict w, int * restrict x, int * restrict y, /* { dg-final { scan-assembler-not {\tbic\t} } } */ /* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #0} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c index 9620030..0001dd3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c @@ -1,5 +1,5 @@ -/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ #include <math.h> @@ -12,3 +12,4 @@ void f13(double * restrict z, double * restrict w, double * restrict x, double * /* { dg-final { scan-assembler-not {\tbic\t} } } */ /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d} 1 } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 03cc726..2284ad0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10344,6 +10344,7 @@ vectorizable_condition (vec_info *vinfo, else { bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0)); + tree_code orig_code = cond.code; cond.code = invert_tree_comparison (cond.code, honor_nans); if (loop_vinfo->scalar_cond_masked_set.contains (cond)) { @@ -10351,6 +10352,22 @@ vectorizable_condition (vec_info *vinfo, cond_code = cond.code; swap_cond_operands = true; } + else + { + /* Try the inverse of the current mask. We check if the + inverse mask is live and if so we generate a negate of + the current mask such that we still honor NaNs. */ + cond.inverted_p = true; + cond.code = orig_code; + if (loop_vinfo->scalar_cond_masked_set.contains (cond)) + { + bitop1 = orig_code; + bitop2 = BIT_NOT_EXPR; + masks = &LOOP_VINFO_MASKS (loop_vinfo); + cond_code = cond.code; + swap_cond_operands = true; + } + } } } } diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 3247c9a..f493d63 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1678,6 +1678,7 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t) this->code = TREE_CODE (t); this->op0 = TREE_OPERAND (t, 0); this->op1 = TREE_OPERAND (t, 1); + this->inverted_p = false; return; } @@ -1690,13 +1691,31 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t) this->code = code; this->op0 = gimple_assign_rhs1 (stmt); this->op1 = gimple_assign_rhs2 (stmt); + this->inverted_p = false; return; } + else if (code == BIT_NOT_EXPR) + { + tree n_op = gimple_assign_rhs1 (stmt); + if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op)))) + { + code = gimple_assign_rhs_code (stmt); + if (TREE_CODE_CLASS (code) == tcc_comparison) + { + this->code = code; + this->op0 = gimple_assign_rhs1 (stmt); + this->op1 = gimple_assign_rhs2 (stmt); + this->inverted_p = true; + return; + } + } + } } this->code = NE_EXPR; this->op0 = t; this->op1 = build_zero_cst (TREE_TYPE (t)); + this->inverted_p = false; } /* See the comment above the declaration for details. */ diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f8f3064..bd6f334 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -266,6 +266,7 @@ struct scalar_cond_masked_key void get_cond_ops_from_tree (tree); unsigned ncopies; + bool inverted_p; tree_code code; tree op0; tree op1; @@ -285,6 +286,7 @@ struct default_hash_traits<scalar_cond_masked_key> inchash::add_expr (v.op0, h, 0); inchash::add_expr (v.op1, h, 0); h.add_int (v.ncopies); + h.add_flag (v.inverted_p); return h.end (); } @@ -292,9 +294,10 @@ struct default_hash_traits<scalar_cond_masked_key> equal (value_type existing, value_type candidate) { return (existing.ncopies == candidate.ncopies - && existing.code == candidate.code - && operand_equal_p (existing.op0, candidate.op0, 0) - && operand_equal_p (existing.op1, candidate.op1, 0)); + && existing.code == candidate.code + && existing.inverted_p == candidate.inverted_p + && operand_equal_p (existing.op0, candidate.op0, 0) + && operand_equal_p (existing.op1, candidate.op1, 0)); } static const bool empty_zero_p = true; @@ -303,6 +306,7 @@ struct default_hash_traits<scalar_cond_masked_key> mark_empty (value_type &v) { v.ncopies = 0; + v.inverted_p = false; } static inline bool |