aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-11-16 11:11:47 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-11-16 11:11:47 +0000
commit217ccab8f46ca5b260319e7b71b421daec6d11c8 (patch)
treea3ef07b19bc12a62c9612d4bf2d8179469013b49 /gcc/config
parente58703e2c1b09d39714740e00933d32df965de32 (diff)
downloadgcc-217ccab8f46ca5b260319e7b71b421daec6d11c8.zip
gcc-217ccab8f46ca5b260319e7b71b421daec6d11c8.tar.gz
gcc-217ccab8f46ca5b260319e7b71b421daec6d11c8.tar.bz2
[AArch64] Pattern-match SVE extending loads
This patch pattern-matches a partial SVE load followed by a sign or zero extension into an extending load. (The partial load is already an extending load; we just don't rely on the upper bits of the elements.) Nothing yet uses the extra LDFF1 and LDNF1 combinations, but it seemed more consistent to provide them, since I needed to update the pattern to use a predicated extension anyway. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-sve.md (@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>): (@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) (@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): Combine into... (@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): ...this new pattern, handling extension to partial modes as well as full modes. Describe the extension as a predicated rather than unpredicated extension. (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>) (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): Combine into... (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): ...this new pattern, handling extension to partial modes as well as full modes. Describe the extension as a predicated rather than unpredicated extension. * config/aarch64/aarch64-sve-builtins.cc (function_expander::use_contiguous_load_insn): Add an extra predicate for extending loads. * config/aarch64/aarch64.c (aarch64_extending_load_p): New function. (aarch64_sve_adjust_stmt_cost): Likewise. (aarch64_add_stmt_cost): Use aarch64_sve_adjust_stmt_cost to adjust the cost of SVE vector stmts. gcc/testsuite/ * gcc.target/aarch64/sve/load_extend_1.c: New test. * gcc.target/aarch64/sve/load_extend_2.c: Likewise. * gcc.target/aarch64/sve/load_extend_3.c: Likewise. * gcc.target/aarch64/sve/load_extend_4.c: Likewise. * gcc.target/aarch64/sve/load_extend_5.c: Likewise. * gcc.target/aarch64/sve/load_extend_6.c: Likewise. * gcc.target/aarch64/sve/load_extend_7.c: Likewise. * gcc.target/aarch64/sve/load_extend_8.c: Likewise. * gcc.target/aarch64/sve/load_extend_9.c: Likewise. * gcc.target/aarch64/sve/load_extend_10.c: Likewise. * gcc.target/aarch64/sve/reduc_4.c: Add --param aarch64-sve-compare-costs=0. From-SVN: r278343
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc6
-rw-r--r--gcc/config/aarch64/aarch64-sve.md107
-rw-r--r--gcc/config/aarch64/aarch64.c46
3 files changed, 85 insertions, 74 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 63d903d..27736b9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2790,7 +2790,9 @@ function_expander::use_vcond_mask_insn (insn_code icode,
}
/* Implement the call using instruction ICODE, which loads memory operand 1
- into register operand 0 under the control of predicate operand 2. */
+ into register operand 0 under the control of predicate operand 2.
+ Extending loads have a further predicate (operand 3) that nominally
+ controls the extension. */
rtx
function_expander::use_contiguous_load_insn (insn_code icode)
{
@@ -2799,6 +2801,8 @@ function_expander::use_contiguous_load_insn (insn_code icode)
add_output_operand (icode);
add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
add_input_operand (icode, args[0]);
+ if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
+ add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
return generate_insn (icode);
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 40aeb95..ce1bd58 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1189,39 +1189,22 @@
;; -------------------------------------------------------------------------
;; Predicated load and extend, with 8 elements per 128-bit block.
-(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>"
- [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx8_WIDE
- (unspec:VNx8_NARROW
- [(match_operand:VNx8BI 2 "register_operand" "Upl")
- (match_operand:VNx8_NARROW 1 "memory_operand" "m")]
- UNSPEC_LD1_SVE)))]
- "TARGET_SVE"
- "ld1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1"
-)
-
-;; Predicated load and extend, with 4 elements per 128-bit block.
-(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
- [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx4_WIDE
- (unspec:VNx4_NARROW
- [(match_operand:VNx4BI 2 "register_operand" "Upl")
- (match_operand:VNx4_NARROW 1 "memory_operand" "m")]
- UNSPEC_LD1_SVE)))]
- "TARGET_SVE"
- "ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1"
-)
-
-;; Predicated load and extend, with 2 elements per 128-bit block.
-(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
- [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 2 "register_operand" "Upl")
- (match_operand:VNx2_NARROW 1 "memory_operand" "m")]
- UNSPEC_LD1_SVE)))]
- "TARGET_SVE"
- "ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1"
+(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_HSDI
+ [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
+ (ANY_EXTEND:SVE_HSDI
+ (unspec:SVE_PARTIAL_I
+ [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
+ UNSPEC_LD1_SVE))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
+ "&& !CONSTANT_P (operands[3])"
+ {
+ operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
+ }
)
;; -------------------------------------------------------------------------
@@ -1268,46 +1251,24 @@
;; - LDNF1W
;; -------------------------------------------------------------------------
-;; Predicated first-faulting or non-faulting load and extend, with 8 elements
-;; per 128-bit block.
-(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>"
- [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx8_WIDE
- (unspec:VNx8_NARROW
- [(match_operand:VNx8BI 2 "register_operand" "Upl")
- (match_operand:VNx8_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
- (reg:VNx16BI FFRT_REGNUM)]
- SVE_LDFF1_LDNF1)))]
- "TARGET_SVE"
- "ld<fn>f1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1"
-)
-
-;; Predicated first-faulting or non-faulting load and extend, with 4 elements
-;; per 128-bit block.
-(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
- [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx4_WIDE
- (unspec:VNx4_NARROW
- [(match_operand:VNx4BI 2 "register_operand" "Upl")
- (match_operand:VNx4_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
- (reg:VNx16BI FFRT_REGNUM)]
- SVE_LDFF1_LDNF1)))]
- "TARGET_SVE"
- "ld<fn>f1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1"
-)
-
-;; Predicated first-faulting or non-faulting load and extend, with 2 elements
-;; per 128-bit block.
-(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
- [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w")
- (ANY_EXTEND:VNx2_WIDE
- (unspec:VNx2_NARROW
- [(match_operand:VNx2BI 2 "register_operand" "Upl")
- (match_operand:VNx2_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
- (reg:VNx16BI FFRT_REGNUM)]
- SVE_LDFF1_LDNF1)))]
- "TARGET_SVE"
- "ld<fn>f1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1"
+;; Predicated first-faulting or non-faulting load and extend.
+(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+ [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_HSDI
+ [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
+ (ANY_EXTEND:SVE_HSDI
+ (unspec:SVE_PARTIAL_I
+ [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
+ (reg:VNx16BI FFRT_REGNUM)]
+ SVE_LDFF1_LDNF1))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
+ "&& !CONSTANT_P (operands[3])"
+ {
+ operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
+ }
)
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d175e1f..305c6da 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12879,6 +12879,49 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
+/* Return true if STMT_INFO extends the result of a load. */
+static bool
+aarch64_extending_load_p (stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+ return false;
+
+ tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+ tree rhs_type = TREE_TYPE (rhs);
+ if (!INTEGRAL_TYPE_P (lhs_type)
+ || !INTEGRAL_TYPE_P (rhs_type)
+ || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
+ return false;
+
+ stmt_vec_info def_stmt_info = stmt_info->vinfo->lookup_def (rhs);
+ return (def_stmt_info
+ && STMT_VINFO_DATA_REF (def_stmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
+}
+
+/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
+ for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary
+ for SVE targets. */
+static unsigned int
+aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+ unsigned int stmt_cost)
+{
+ /* Unlike vec_promote_demote, vector_stmt conversions do not change the
+ vector register size or number of units. Integer promotions of this
+ type therefore map to SXT[BHW] or UXT[BHW].
+
+ Most loads have extending forms that can do the sign or zero extension
+ on the fly. Optimistically assume that a load followed by an extension
+ will fold to this form during combine, and that the extension therefore
+ comes for free. */
+ if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
+ stmt_cost = 0;
+
+ return stmt_cost;
+}
+
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
@@ -12894,6 +12937,9 @@ aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
int stmt_cost =
aarch64_builtin_vectorization_cost (kind, vectype, misalign);
+ if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
+ stmt_cost = aarch64_sve_adjust_stmt_cost (kind, stmt_info, stmt_cost);
+
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */