aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-08-13 21:33:51 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-08-13 21:33:51 +0000
commit0b1fe8cf6f1dde656c505dde6d27279dff388962 (patch)
treea3eff991a44ccd4c0437b4c3b39690ba2caa10a3 /gcc/config
parentabb1d111f99fa2b480923e55dd74a974a62c9624 (diff)
downloadgcc-0b1fe8cf6f1dde656c505dde6d27279dff388962.zip
gcc-0b1fe8cf6f1dde656c505dde6d27279dff388962.tar.gz
gcc-0b1fe8cf6f1dde656c505dde6d27279dff388962.tar.bz2
Optimise constant IFN_WHILE_ULTs
This patch is a combination of two changes that have to be committed as a single unit: (1) Try to fold IFN_WHILE_ULTs with constant arguments to a VECTOR_CST (which is always possible for fixed-length vectors but is not necessarily so for variable-length vectors) (2) Make the SVE port recognise constants that map to PTRUE VLn, which includes those generated by the new fold. (2) can't be tested without (1) and (1) would be a significant pessimisation without (2). The target-specific parts also start moving towards doing predicate manipulation in a canonical VNx16BImode form, using rtx_vector_builders. 2019-08-13 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree.h (build_vector_a_then_b): Declare. * tree.c (build_vector_a_then_b): New function. * fold-const-call.c (fold_while_ult): Likewise. (fold_const_call): Use it to handle IFN_WHILE_ULT. * config/aarch64/aarch64-protos.h (AARCH64_FOR_SVPATTERN): New macro. (aarch64_svpattern): New enum. * config/aarch64/aarch64-sve.md (mov<PRED_ALL:mode>): Pass constants through aarch64_expand_mov_immediate. (*aarch64_sve_mov<PRED_ALL:mode>): Use aarch64_mov_operand rather than general_operand as the predicate for operand 1. (while_ult<GPI:mode><PRED_ALL:mode>): Add a '@' marker. * config/aarch64/aarch64.c (simd_immediate_info::PTRUE): New insn_type. (simd_immediate_info::simd_immediate_info): New overload that takes a scalar_int_mode and an svpattern. (simd_immediate_info::u): Add a "pattern" field. (svpattern_token): New function. (aarch64_get_sve_pred_bits, aarch64_widest_sve_pred_elt_size) (aarch64_partial_ptrue_length, aarch64_svpattern_for_vl) (aarch64_sve_move_pred_via_while): New functions. (aarch64_expand_mov_immediate): Try using aarch64_sve_move_pred_via_while for predicates that contain N ones followed by M zeros but that do not correspond to a VLnnn pattern. (aarch64_sve_pred_valid_immediate): New function. (aarch64_simd_valid_immediate): Use it instead of dealing directly with PTRUE and PFALSE. (aarch64_output_sve_mov_immediate): Handle new simd_immediate_info forms. gcc/testsuite/ * gcc.target/aarch64/sve/spill_2.c: Increase iteration counts beyond the range of a PTRUE. * gcc.target/aarch64/sve/while_6.c: New test. * gcc.target/aarch64/sve/while_7.c: Likewise. * gcc.target/aarch64/sve/while_8.c: Likewise. * gcc.target/aarch64/sve/while_9.c: Likewise. * gcc.target/aarch64/sve/while_10.c: Likewise. From-SVN: r274402
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h27
-rw-r--r--gcc/config/aarch64/aarch64-sve.md10
-rw-r--r--gcc/config/aarch64/aarch64.c275
3 files changed, 287 insertions, 25 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ad818a4..86d53c5 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -406,6 +406,33 @@ extern enum aarch64_key_type aarch64_ra_sign_key;
extern struct tune_params aarch64_tune_params;
+/* The available SVE predicate patterns, known in the ACLE as "svpattern". */
+#define AARCH64_FOR_SVPATTERN(T) \
+ T (POW2, pow2, 0) \
+ T (VL1, vl1, 1) \
+ T (VL2, vl2, 2) \
+ T (VL3, vl3, 3) \
+ T (VL4, vl4, 4) \
+ T (VL5, vl5, 5) \
+ T (VL6, vl6, 6) \
+ T (VL7, vl7, 7) \
+ T (VL8, vl8, 8) \
+ T (VL16, vl16, 9) \
+ T (VL32, vl32, 10) \
+ T (VL64, vl64, 11) \
+ T (VL128, vl128, 12) \
+ T (VL256, vl256, 13) \
+ T (MUL4, mul4, 29) \
+ T (MUL3, mul3, 30) \
+ T (ALL, all, 31)
+
+#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE,
+enum aarch64_svpattern {
+ AARCH64_FOR_SVPATTERN (AARCH64_SVENUM)
+ AARCH64_NUM_SVPATTERNS
+};
+#undef AARCH64_SVENUM
+
void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 950f397..53d93a3 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -481,12 +481,18 @@
{
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (CONSTANT_P (operands[1]))
+ {
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }
}
)
(define_insn "*aarch64_sve_mov<mode>"
[(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
- (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dn"))]
+ (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
"TARGET_SVE
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
@@ -2923,7 +2929,7 @@
;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
;; with the comparison being unsigned.
-(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
+(define_insn "@while_ult<GPI:mode><PRED_ALL:mode>"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
(unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fe96845..2b3ea9f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -83,7 +83,7 @@
/* Information about a legitimate vector immediate operand. */
struct simd_immediate_info
{
- enum insn_type { MOV, MVN, INDEX };
+ enum insn_type { MOV, MVN, INDEX, PTRUE };
enum modifier_type { LSL, MSL };
simd_immediate_info () {}
@@ -92,6 +92,7 @@ struct simd_immediate_info
insn_type = MOV, modifier_type = LSL,
unsigned int = 0);
simd_immediate_info (scalar_mode, rtx, rtx);
+ simd_immediate_info (scalar_int_mode, aarch64_svpattern);
/* The mode of the elements. */
scalar_mode elt_mode;
@@ -120,6 +121,9 @@ struct simd_immediate_info
subsequent element. */
rtx base, step;
} index;
+
+ /* For PTRUE. */
+ aarch64_svpattern pattern;
} u;
};
@@ -159,6 +163,16 @@ inline simd_immediate_info
u.index.step = step_in;
}
+/* Construct a predicate that controls elements of mode ELT_MODE_IN
+ and has PTRUE pattern PATTERN_IN. */
+inline simd_immediate_info
+::simd_immediate_info (scalar_int_mode elt_mode_in,
+ aarch64_svpattern pattern_in)
+ : elt_mode (elt_mode_in), insn (PTRUE)
+{
+ u.pattern = pattern_in;
+}
+
/* The current code model. */
enum aarch64_code_model aarch64_cmodel;
@@ -1334,6 +1348,22 @@ static const char *const aarch64_sve_condition_codes[] =
"pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv"
};
+/* Return the assembly token for svpattern value VALUE. */
+
+static const char *
+svpattern_token (enum aarch64_svpattern pattern)
+{
+ switch (pattern)
+ {
+#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER;
+ AARCH64_FOR_SVPATTERN (CASE)
+#undef CASE
+ case AARCH64_NUM_SVPATTERNS:
+ break;
+ }
+ gcc_unreachable ();
+}
+
/* Generate code to enable conditional branches in functions over 1 MiB. */
const char *
aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
@@ -2529,6 +2559,146 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
}
}
+/* Return true if predicate value X is a constant in which every element
+ is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI
+ value, i.e. as a predicate in which all bits are significant. */
+
+static bool
+aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x)
+{
+ if (GET_CODE (x) != CONST_VECTOR)
+ return false;
+
+ unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
+ GET_MODE_NUNITS (GET_MODE (x)));
+ unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor;
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+ builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern);
+
+ unsigned int nelts = const_vector_encoded_nelts (x);
+ for (unsigned int i = 0; i < nelts; ++i)
+ {
+ rtx elt = CONST_VECTOR_ENCODED_ELT (x, i);
+ if (!CONST_INT_P (elt))
+ return false;
+
+ builder.quick_push (elt);
+ for (unsigned int j = 1; j < factor; ++j)
+ builder.quick_push (const0_rtx);
+ }
+ builder.finalize ();
+ return true;
+}
+
+/* BUILDER contains a predicate constant of mode VNx16BI. Return the
+ widest predicate element size it can have (that is, the largest size
+ for which each element would still be 0 or 1). */
+
+unsigned int
+aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder)
+{
+ /* Start with the most optimistic assumption: that we only need
+ one bit per pattern. This is what we will use if only the first
+ bit in each pattern is ever set. */
+ unsigned int mask = GET_MODE_SIZE (DImode);
+ mask |= builder.npatterns ();
+
+ /* Look for set bits. */
+ unsigned int nelts = builder.encoded_nelts ();
+ for (unsigned int i = 1; i < nelts; ++i)
+ if (INTVAL (builder.elt (i)) != 0)
+ {
+ if (i & 1)
+ return 1;
+ mask |= i;
+ }
+ return mask & -mask;
+}
+
+/* BUILDER is a predicate constant of mode VNx16BI. Consider the value
+ that the constant would have with predicate element size ELT_SIZE
+ (ignoring the upper bits in each element) and return:
+
+ * -1 if all bits are set
+ * N if the predicate has N leading set bits followed by all clear bits
+ * 0 if the predicate does not have any of these forms. */
+
+int
+aarch64_partial_ptrue_length (rtx_vector_builder &builder,
+ unsigned int elt_size)
+{
+ /* If nelts_per_pattern is 3, we have set bits followed by clear bits
+ followed by set bits. */
+ if (builder.nelts_per_pattern () == 3)
+ return 0;
+
+ /* Skip over leading set bits. */
+ unsigned int nelts = builder.encoded_nelts ();
+ unsigned int i = 0;
+ for (; i < nelts; i += elt_size)
+ if (INTVAL (builder.elt (i)) == 0)
+ break;
+ unsigned int vl = i / elt_size;
+
+ /* Check for the all-true case. */
+ if (i == nelts)
+ return -1;
+
+ /* If nelts_per_pattern is 1, then either VL is zero, or we have a
+ repeating pattern of set bits followed by clear bits. */
+ if (builder.nelts_per_pattern () != 2)
+ return 0;
+
+ /* We have a "foreground" value and a duplicated "background" value.
+ If the background might repeat and the last set bit belongs to it,
+ we might have set bits followed by clear bits followed by set bits. */
+ if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ()))
+ return 0;
+
+ /* Make sure that the rest are all clear. */
+ for (; i < nelts; i += elt_size)
+ if (INTVAL (builder.elt (i)) != 0)
+ return 0;
+
+ return vl;
+}
+
+/* See if there is an svpattern that encodes an SVE predicate of mode
+ PRED_MODE in which the first VL bits are set and the rest are clear.
+ Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
+ A VL of -1 indicates an all-true vector. */
+
+aarch64_svpattern
+aarch64_svpattern_for_vl (machine_mode pred_mode, int vl)
+{
+ if (vl < 0)
+ return AARCH64_SV_ALL;
+
+ if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode)))
+ return AARCH64_NUM_SVPATTERNS;
+
+ if (vl >= 1 && vl <= 8)
+ return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1));
+
+ if (vl >= 16 && vl <= 256 && pow2p_hwi (vl))
+ return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4));
+
+ int max_vl;
+ if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl))
+ {
+ if (vl == (max_vl / 3) * 3)
+ return AARCH64_SV_MUL3;
+ /* These would only trigger for non-power-of-2 lengths. */
+ if (vl == (max_vl & -4))
+ return AARCH64_SV_MUL4;
+ if (vl == (1 << floor_log2 (max_vl)))
+ return AARCH64_SV_POW2;
+ if (vl == max_vl)
+ return AARCH64_SV_ALL;
+ }
+ return AARCH64_NUM_SVPATTERNS;
+}
+
/* Return an all-true predicate register of mode MODE. */
rtx
@@ -3447,6 +3617,17 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
return target;
}
+/* Use WHILE to set predicate register DEST so that the first VL bits
+ are set and the rest are clear. */
+
+static void
+aarch64_sve_move_pred_via_while (rtx dest, unsigned int vl)
+{
+ rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
+ emit_insn (gen_while_ult (DImode, GET_MODE (dest),
+ dest, const0_rtx, limit));
+}
+
/* Set DEST to immediate IMM. */
void
@@ -3580,6 +3761,19 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
+ rtx_vector_builder builder;
+ if (GET_MODE_CLASS (GET_MODE (imm)) == MODE_VECTOR_BOOL
+ && aarch64_get_sve_pred_bits (builder, imm))
+ {
+ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
+ int vl = aarch64_partial_ptrue_length (builder, elt_size);
+ if (vl > 0)
+ {
+ aarch64_sve_move_pred_via_while (dest, vl);
+ return;
+ }
+ }
+
if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
{
@@ -14776,6 +14970,44 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
return false;
}
+/* Return true if X is a valid SVE predicate. If INFO is nonnull, use
+ it to describe valid immediates. */
+
+static bool
+aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
+{
+ if (x == CONST0_RTX (GET_MODE (x)))
+ {
+ if (info)
+ *info = simd_immediate_info (DImode, 0);
+ return true;
+ }
+
+ /* Analyze the value as a VNx16BImode. This should be relatively
+ efficient, since rtx_vector_builder has enough built-in capacity
+ to store all VLA predicate constants without needing the heap. */
+ rtx_vector_builder builder;
+ if (!aarch64_get_sve_pred_bits (builder, x))
+ return false;
+
+ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
+ if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
+ {
+ machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
+ aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl);
+ if (pattern != AARCH64_NUM_SVPATTERNS)
+ {
+ if (info)
+ {
+ scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
+ *info = simd_immediate_info (int_mode, pattern);
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
/* Return true if OP is a valid SIMD immediate for the operation
described by WHICH. If INFO is nonnull, use it to describe valid
immediates. */
@@ -14788,6 +15020,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
return false;
+ if (vec_flags & VEC_SVE_PRED)
+ return aarch64_sve_pred_valid_immediate (op, info);
+
scalar_mode elt_mode = GET_MODE_INNER (mode);
rtx base, step;
unsigned int n_elts;
@@ -14812,21 +15047,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
else
return false;
- /* Handle PFALSE and PTRUE. */
- if (vec_flags & VEC_SVE_PRED)
- {
- if (op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode))
- {
- if (info)
- {
- scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
- *info = simd_immediate_info (int_mode, op == CONSTM1_RTX (mode));
- }
- return true;
- }
- return false;
- }
-
scalar_float_mode elt_float_mode;
if (n_elts == 1
&& is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
@@ -16570,14 +16790,23 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
if (aarch64_sve_pred_mode_p (vec_mode))
{
static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
- unsigned int total_bytes;
- if (info.u.mov.value == const0_rtx)
- snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
- else if (BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
- total_bytes / GET_MODE_SIZE (info.elt_mode));
+ if (info.insn == simd_immediate_info::MOV)
+ {
+ gcc_assert (info.u.mov.value == const0_rtx);
+ snprintf (buf, sizeof (buf), "pfalse\t%%0.b");
+ }
else
- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", element_char);
+ {
+ gcc_assert (info.insn == simd_immediate_info::PTRUE);
+ unsigned int total_bytes;
+ if (info.u.pattern == AARCH64_SV_ALL
+ && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes))
+ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char,
+ total_bytes / GET_MODE_SIZE (info.elt_mode));
+ else
+ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char,
+ svpattern_token (info.u.pattern));
+ }
return buf;
}