aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog79
-rw-r--r--gcc/config/aarch64/aarch64-modes.def8
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-sve.md133
-rw-r--r--gcc/config/aarch64/aarch64.c230
-rw-r--r--gcc/config/aarch64/iterators.md176
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/mixed_size_1.c39
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/mixed_size_2.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/mixed_size_3.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/mixed_size_4.c43
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/mixed_size_5.c42
12 files changed, 674 insertions, 167 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b7e46cf..afb995f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,84 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/aarch64-modes.def: Define partial SVE vector
+ float modes.
+ * config/aarch64/aarch64-protos.h (aarch64_sve_pred_mode): New
+ function.
+ * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle the
+ new vector float modes.
+ (aarch64_sve_container_bits): New function.
+ (aarch64_sve_pred_mode): Likewise.
+ (aarch64_get_mask_mode): Use it.
+ (aarch64_sve_element_int_mode): Handle structure modes and partial
+ modes.
+ (aarch64_sve_container_int_mode): New function.
+ (aarch64_vectorize_related_mode): Return SVE modes when given
+ SVE modes. Handle partial modes, taking the preferred number
+ of units from the size of the given mode.
+ (aarch64_hard_regno_mode_ok): Allow partial modes to be stored
+ in registers.
+ (aarch64_expand_sve_ld1rq): Use the mode form of aarch64_sve_pred_mode.
+ (aarch64_expand_sve_const_vector): Handle partial SVE vectors.
+ (aarch64_split_sve_subreg_move): Use the mode form of
+ aarch64_sve_pred_mode.
+ (aarch64_secondary_reload): Handle partial modes in the same way
+ as full big-endian vectors.
+ (aarch64_vector_mode_supported_p): Allow partial SVE vectors.
+ (aarch64_autovectorize_vector_modes): Try unpacked SVE vectors,
+ merging with the Advanced SIMD modes. If two modes have the
+ same size, try the Advanced SIMD mode first.
+ (aarch64_simd_valid_immediate): Use the container rather than
+ the element mode for INDEX constants.
+ (aarch64_simd_vector_alignment): Make the alignment of partial
+ SVE vector modes the same as their minimum size.
+ (aarch64_evpc_sel): Use the mode form of aarch64_sve_pred_mode.
+ * config/aarch64/aarch64-sve.md (mov<SVE_FULL:mode>): Extend to...
+ (mov<SVE_ALL:mode>): ...this.
+ (movmisalign<SVE_FULL:mode>): Extend to...
+ (movmisalign<SVE_ALL:mode>): ...this.
+ (*aarch64_sve_mov<mode>_le): Rename to...
+ (*aarch64_sve_mov<mode>_ldr_str): ...this.
+ (*aarch64_sve_mov<SVE_FULL:mode>_be): Rename and extend to...
+ (*aarch64_sve_mov<SVE_ALL:mode>_no_ldr_str): ...this. Handle
+ partial modes regardless of endianness.
+ (aarch64_sve_reload_be): Rename to...
+ (aarch64_sve_reload_mem): ...this and enable for little-endian.
+ Use aarch64_sve_pred_mode to get the appropriate predicate mode.
+ (@aarch64_pred_mov<SVE_FULL:mode>): Extend to...
+ (@aarch64_pred_mov<SVE_ALL:mode>): ...this.
+ (*aarch64_sve_mov<SVE_FULL:mode>_subreg_be): Extend to...
+ (*aarch64_sve_mov<SVE_ALL:mode>_subreg_be): ...this.
+ (@aarch64_sve_reinterpret<SVE_FULL:mode>): Extend to...
+ (@aarch64_sve_reinterpret<SVE_ALL:mode>): ...this.
+ (*aarch64_sve_reinterpret<SVE_FULL:mode>): Extend to...
+ (*aarch64_sve_reinterpret<SVE_ALL:mode>): ...this.
+ (maskload<SVE_FULL:mode><vpred>): Extend to...
+ (maskload<SVE_ALL:mode><vpred>): ...this.
+ (maskstore<SVE_FULL:mode><vpred>): Extend to...
+ (maskstore<SVE_ALL:mode><vpred>): ...this.
+ (vec_duplicate<SVE_FULL:mode>): Extend to...
+ (vec_duplicate<SVE_ALL:mode>): ...this.
+ (*vec_duplicate<SVE_FULL:mode>_reg): Extend to...
+ (*vec_duplicate<SVE_ALL:mode>_reg): ...this.
+ (sve_ld1r<SVE_FULL:mode>): Extend to...
+ (sve_ld1r<SVE_ALL:mode>): ...this.
+ (vec_series<SVE_FULL_I:mode>): Extend to...
+ (vec_series<SVE_I:mode>): ...this.
+ (*vec_series<SVE_FULL_I:mode>_plus): Extend to...
+ (*vec_series<SVE_I:mode>_plus): ...this.
+ (@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Avoid
+ new VPRED ambiguity.
+ (@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Likewise.
+ (add<SVE_FULL_I:mode>3): Extend to...
+ (add<SVE_I:mode>3): ...this.
+ * config/aarch64/iterators.md (SVE_ALL, SVE_I): New mode iterators.
+ (Vetype, Vesize, VEL, Vel, vwcore): Handle partial SVE vector modes.
+ (VPRED, vpred): Likewise.
+ (Vctype): New iterator.
+ (vw): Remove SVE modes.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* config/aarch64/iterators.md (SVE_PARTIAL): Rename to...
(SVE_PARTIAL_I): ...this.
* config/aarch64/aarch64-sve.md: Apply the above renaming throughout.
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index a9b1bce..3c698b6 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -123,13 +123,18 @@ SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 1);
+VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8, 1);
ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2HF, aarch64_sve_vg);
+ADJUST_NUNITS (VNx2SF, aarch64_sve_vg);
ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
+ADJUST_NUNITS (VNx4HF, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
@@ -139,8 +144,11 @@ ADJUST_ALIGNMENT (VNx8QI, 1);
ADJUST_ALIGNMENT (VNx2HI, 2);
ADJUST_ALIGNMENT (VNx4HI, 2);
+ADJUST_ALIGNMENT (VNx2HF, 2);
+ADJUST_ALIGNMENT (VNx4HF, 2);
ADJUST_ALIGNMENT (VNx2SI, 4);
+ADJUST_ALIGNMENT (VNx2SF, 4);
/* Quad float: 128-bit floating mode for long doubles. */
FLOAT_MODE (TF, 16, ieee_quad_format);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1d4f4fd..bcb3fd4 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -512,6 +512,7 @@ bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
machine_mode aarch64_sve_int_mode (machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
+machine_mode aarch64_sve_pred_mode (machine_mode);
opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64);
bool aarch64_sve_mode_p (machine_mode);
HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 5b71ab0..b43d4fb 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -546,8 +546,8 @@
;; -------------------------------------------------------------------------
(define_expand "mov<mode>"
- [(set (match_operand:SVE_FULL 0 "nonimmediate_operand")
- (match_operand:SVE_FULL 1 "general_operand"))]
+ [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+ (match_operand:SVE_ALL 1 "general_operand"))]
"TARGET_SVE"
{
/* Use the predicated load and store patterns where possible.
@@ -576,8 +576,8 @@
)
(define_expand "movmisalign<mode>"
- [(set (match_operand:SVE_FULL 0 "nonimmediate_operand")
- (match_operand:SVE_FULL 1 "general_operand"))]
+ [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+ (match_operand:SVE_ALL 1 "general_operand"))]
"TARGET_SVE"
{
/* Equivalent to a normal move for our purpooses. */
@@ -586,10 +586,11 @@
}
)
-;; Unpredicated moves (bytes or little-endian). Only allow memory operations
-;; during and after RA; before RA we want the predicated load and store
-;; patterns to be used instead.
-(define_insn "*aarch64_sve_mov<mode>_le"
+;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
+;; little-endian ordering is acceptable. Only allow memory operations during
+;; and after RA; before RA we want the predicated load and store patterns to
+;; be used instead.
+(define_insn "*aarch64_sve_mov<mode>_ldr_str"
[(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
(match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
"TARGET_SVE
@@ -604,35 +605,37 @@
* return aarch64_output_sve_mov_immediate (operands[1]);"
)
-;; Unpredicated moves (non-byte big-endian). Memory accesses require secondary
-;; reloads.
-(define_insn "*aarch64_sve_mov<mode>_be"
- [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
- (match_operand:SVE_FULL 1 "aarch64_nonmemory_operand" "w, Dn"))]
- "TARGET_SVE && BYTES_BIG_ENDIAN && <MODE>mode != VNx16QImode"
+;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
+;; or vectors for which little-endian ordering isn't acceptable. Memory
+;; accesses require secondary reloads.
+(define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
+ (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
+ "TARGET_SVE
+ && <MODE>mode != VNx16QImode
+ && (BYTES_BIG_ENDIAN
+ || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
"@
mov\t%0.d, %1.d
* return aarch64_output_sve_mov_immediate (operands[1]);"
)
-;; Handle big-endian memory reloads. We use byte PTRUE for all modes
-;; to try to encourage reuse.
-;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook.
-(define_expand "aarch64_sve_reload_be"
+;; Handle memory reloads for modes that can't use LDR and STR. We use
+;; byte PTRUE for all modes to try to encourage reuse. This pattern
+;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
+(define_expand "aarch64_sve_reload_mem"
[(parallel
[(set (match_operand 0)
(match_operand 1))
(clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
- "TARGET_SVE && BYTES_BIG_ENDIAN"
+ "TARGET_SVE"
{
/* Create a PTRUE. */
emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
/* Refer to the PTRUE in the appropriate mode for this move. */
machine_mode mode = GET_MODE (operands[0]);
- machine_mode pred_mode
- = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
- rtx pred = gen_lowpart (pred_mode, operands[2]);
+ rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
/* Emit a predicated load or store. */
aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
@@ -644,18 +647,18 @@
;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
;; so changes to this pattern will need changes there as well.
(define_insn_and_split "@aarch64_pred_mov<mode>"
- [(set (match_operand:SVE_FULL 0 "nonimmediate_operand" "=w, w, m")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
+ (unspec:SVE_ALL
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (match_operand:SVE_FULL 2 "nonimmediate_operand" "w, m, w")]
+ (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
UNSPEC_PRED_X))]
"TARGET_SVE
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
#
- ld1<Vesize>\t%0.<Vetype>, %1/z, %2
- st1<Vesize>\t%2.<Vetype>, %1, %0"
+ ld1<Vesize>\t%0.<Vctype>, %1/z, %2
+ st1<Vesize>\t%2.<Vctype>, %1, %0"
"&& register_operand (operands[0], <MODE>mode)
&& register_operand (operands[2], <MODE>mode)"
[(set (match_dup 0) (match_dup 2))]
@@ -666,8 +669,8 @@
;; for details. We use a special predicate for operand 2 to reduce
;; the number of patterns.
(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
- [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
+ (unspec:SVE_ALL
[(match_operand:VNx16BI 1 "register_operand" "Upl")
(match_operand 2 "aarch64_any_register_operand" "w")]
UNSPEC_REV_SUBREG))]
@@ -685,8 +688,8 @@
;; This is equivalent to a subreg on little-endian targets but not for
;; big-endian; see the comment at the head of the file for details.
(define_expand "@aarch64_sve_reinterpret<mode>"
- [(set (match_operand:SVE_FULL 0 "register_operand")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (unspec:SVE_ALL
[(match_operand 1 "aarch64_any_register_operand")]
UNSPEC_REINTERPRET))]
"TARGET_SVE"
@@ -702,8 +705,8 @@
;; A pattern for handling type punning on big-endian targets. We use a
;; special predicate for operand 1 to reduce the number of patterns.
(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
- [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
[(match_operand 1 "aarch64_any_register_operand" "w")]
UNSPEC_REINTERPRET))]
"TARGET_SVE"
@@ -1141,13 +1144,13 @@
;; Predicated LD1.
(define_insn "maskload<mode><vpred>"
- [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_FULL 1 "memory_operand" "m")]
+ (match_operand:SVE_ALL 1 "memory_operand" "m")]
UNSPEC_LD1_SVE))]
"TARGET_SVE"
- "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
+ "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
)
;; Unpredicated LD[234].
@@ -1940,14 +1943,14 @@
;; Predicated ST1.
(define_insn "maskstore<mode><vpred>"
- [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
+ (unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_FULL 1 "register_operand" "w")
+ (match_operand:SVE_ALL 1 "register_operand" "w")
(match_dup 0)]
UNSPEC_ST1_SVE))]
"TARGET_SVE"
- "st1<Vesize>\t%1.<Vetype>, %2, %0"
+ "st1<Vesize>\t%1.<Vctype>, %2, %0"
)
;; Unpredicated ST[234]. This is always a full update, so the dependence
@@ -2283,8 +2286,8 @@
(define_expand "vec_duplicate<mode>"
[(parallel
- [(set (match_operand:SVE_FULL 0 "register_operand")
- (vec_duplicate:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
(clobber (scratch:VNx16BI))])]
"TARGET_SVE"
@@ -2304,8 +2307,8 @@
;; the load at the first opportunity in order to allow the PTRUE to be
;; optimized with surrounding code.
(define_insn_and_split "*vec_duplicate<mode>_reg"
- [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w")
- (vec_duplicate:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
+ (vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
(clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
"TARGET_SVE"
@@ -2364,12 +2367,12 @@
;; be used by combine to optimize selects of a a vec_duplicate<mode>
;; with zero.
(define_insn "sve_ld1r<mode>"
- [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
- (unspec:SVE_FULL
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
[(match_operand:<VPRED> 1 "register_operand" "Upl")
- (vec_duplicate:SVE_FULL
+ (vec_duplicate:SVE_ALL
(match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
- (match_operand:SVE_FULL 3 "aarch64_simd_imm_zero")]
+ (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
"ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
@@ -2431,29 +2434,29 @@
;; -------------------------------------------------------------------------
(define_insn "vec_series<mode>"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w")
- (vec_series:SVE_FULL_I
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
+ (vec_series:SVE_I
(match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
(match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
"TARGET_SVE"
"@
- index\t%0.<Vetype>, #%1, %<vw>2
- index\t%0.<Vetype>, %<vw>1, #%2
- index\t%0.<Vetype>, %<vw>1, %<vw>2"
+ index\t%0.<Vctype>, #%1, %<vwcore>2
+ index\t%0.<Vctype>, %<vwcore>1, #%2
+ index\t%0.<Vctype>, %<vwcore>1, %<vwcore>2"
)
;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
;; of an INDEX instruction.
(define_insn "*vec_series<mode>_plus"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
- (plus:SVE_FULL_I
- (vec_duplicate:SVE_FULL_I
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (plus:SVE_I
+ (vec_duplicate:SVE_I
(match_operand:<VEL> 1 "register_operand" "r"))
- (match_operand:SVE_FULL_I 2 "immediate_operand")))]
+ (match_operand:SVE_I 2 "immediate_operand")))]
"TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
{
operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
- return "index\t%0.<Vetype>, %<vw>1, #%2";
+ return "index\t%0.<Vctype>, %<vwcore>1, #%2";
}
)
@@ -2821,7 +2824,7 @@
(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
(sign_extend:SVE_FULL_HSDI
(truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
@@ -2834,7 +2837,7 @@
(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
(unspec:SVE_FULL_HSDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
(sign_extend:SVE_FULL_HSDI
(truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
@@ -3386,10 +3389,10 @@
;; -------------------------------------------------------------------------
(define_insn "add<mode>3"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
- (plus:SVE_FULL_I
- (match_operand:SVE_FULL_I 1 "register_operand" "%0, 0, 0, w, w, w")
- (match_operand:SVE_FULL_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
+ (plus:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
+ (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
"TARGET_SVE"
"@
add\t%0.<Vetype>, %0.<Vetype>, #%D2
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9ffe213..d175e1f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1625,6 +1625,11 @@ aarch64_classify_vector_mode (machine_mode mode)
case E_VNx4HImode:
/* Partial SVE SI vector. */
case E_VNx2SImode:
+ /* Partial SVE HF vectors. */
+ case E_VNx2HFmode:
+ case E_VNx4HFmode:
+ /* Partial SVE SF vector. */
+ case E_VNx2SFmode:
return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
case E_VNx16QImode:
@@ -1753,6 +1758,22 @@ aarch64_array_mode_supported_p (machine_mode mode,
return false;
}
+/* MODE is some form of SVE vector mode. For data modes, return the number
+ of vector register bits that each element of MODE occupies, such as 64
+ for both VNx2DImode and VNx2SImode (where each 32-bit value is stored
+ in a 64-bit container). For predicate modes, return the number of
+ data bits controlled by each significant predicate bit. */
+
+static unsigned int
+aarch64_sve_container_bits (machine_mode mode)
+{
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ poly_uint64 vector_bits = (vec_flags & (VEC_PARTIAL | VEC_SVE_PRED)
+ ? BITS_PER_SVE_VECTOR
+ : GET_MODE_BITSIZE (mode));
+ return vector_element_size (vector_bits, GET_MODE_NUNITS (mode));
+}
+
/* Return the SVE predicate mode to use for elements that have
ELEM_NBYTES bytes, if such a mode exists. */
@@ -1773,6 +1794,16 @@ aarch64_sve_pred_mode (unsigned int elem_nbytes)
return opt_machine_mode ();
}
+/* Return the SVE predicate mode that should be used to control
+ SVE mode MODE. */
+
+machine_mode
+aarch64_sve_pred_mode (machine_mode mode)
+{
+ unsigned int bits = aarch64_sve_container_bits (mode);
+ return aarch64_sve_pred_mode (bits / BITS_PER_UNIT).require ();
+}
+
/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
static opt_machine_mode
@@ -1780,7 +1811,7 @@ aarch64_get_mask_mode (machine_mode mode)
{
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
if (vec_flags & VEC_SVE_DATA)
- return aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode));
+ return aarch64_sve_pred_mode (mode);
return default_get_mask_mode (mode);
}
@@ -1806,11 +1837,25 @@ aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
static scalar_int_mode
aarch64_sve_element_int_mode (machine_mode mode)
{
- unsigned int elt_bits = vector_element_size (BITS_PER_SVE_VECTOR,
+ poly_uint64 vector_bits = (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ ? BITS_PER_SVE_VECTOR
+ : GET_MODE_BITSIZE (mode));
+ unsigned int elt_bits = vector_element_size (vector_bits,
GET_MODE_NUNITS (mode));
return int_mode_for_size (elt_bits, 0).require ();
}
+/* Return an integer element mode that contains exactly
+ aarch64_sve_container_bits (MODE) bits. This is wider than
+ aarch64_sve_element_int_mode if MODE is a partial vector,
+ otherwise it's the same. */
+
+static scalar_int_mode
+aarch64_sve_container_int_mode (machine_mode mode)
+{
+ return int_mode_for_size (aarch64_sve_container_bits (mode), 0).require ();
+}
+
/* Return the integer vector mode associated with SVE mode MODE.
Unlike related_int_vector_mode, this can handle the case in which
MODE is a predicate (and thus has a different total size). */
@@ -1831,6 +1876,37 @@ aarch64_vectorize_related_mode (machine_mode vector_mode,
{
unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
+ /* If we're operating on SVE vectors, try to return an SVE mode. */
+ poly_uint64 sve_nunits;
+ if ((vec_flags & VEC_SVE_DATA)
+ && multiple_p (BYTES_PER_SVE_VECTOR,
+ GET_MODE_SIZE (element_mode), &sve_nunits))
+ {
+ machine_mode sve_mode;
+ if (maybe_ne (nunits, 0U))
+ {
+ /* Try to find a full or partial SVE mode with exactly
+ NUNITS units. */
+ if (multiple_p (sve_nunits, nunits)
+ && aarch64_sve_data_mode (element_mode,
+ nunits).exists (&sve_mode))
+ return sve_mode;
+ }
+ else
+ {
+ /* Take the preferred number of units from the number of bytes
+ that fit in VECTOR_MODE. We always start by "autodetecting"
+ a full vector mode with preferred_simd_mode, so vectors
+ chosen here will also be full vector modes. Then
+ autovectorize_vector_modes tries smaller starting modes
+ and thus smaller preferred numbers of units. */
+ sve_nunits = ordered_min (sve_nunits, GET_MODE_SIZE (vector_mode));
+ if (aarch64_sve_data_mode (element_mode,
+ sve_nunits).exists (&sve_mode))
+ return sve_mode;
+ }
+ }
+
/* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
if ((vec_flags & VEC_ADVSIMD)
&& known_eq (nunits, 0U)
@@ -1907,11 +1983,6 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
return mode == DImode;
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- /* At the moment, partial vector modes are only useful for memory
- references, but that could change in future. */
- if (vec_flags & VEC_PARTIAL)
- return false;
-
if (vec_flags & VEC_SVE_PRED)
return pr_or_ffr_regnum_p (regno);
@@ -4015,8 +4086,7 @@ aarch64_expand_sve_ld1rq (rtx dest, rtx src)
}
machine_mode mode = GET_MODE (dest);
- unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
- machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (mode);
rtx ptrue = aarch64_ptrue_reg (pred_mode);
emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue));
return true;
@@ -4037,7 +4107,26 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
scalar_mode elt_mode = GET_MODE_INNER (mode);
unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode);
- unsigned int encoded_bits = npatterns * nelts_per_pattern * elt_bits;
+ unsigned int container_bits = aarch64_sve_container_bits (mode);
+ unsigned int encoded_bits = npatterns * nelts_per_pattern * container_bits;
+
+ if (nelts_per_pattern == 1
+ && encoded_bits <= 128
+ && container_bits != elt_bits)
+ {
+ /* We have a partial vector mode and a constant whose full-vector
+ equivalent would occupy a repeating 128-bit sequence. Build that
+ full-vector equivalent instead, so that we have the option of
+ using LD1RQ and Advanced SIMD operations. */
+ unsigned int repeat = container_bits / elt_bits;
+ machine_mode full_mode = aarch64_full_sve_mode (elt_mode).require ();
+ rtx_vector_builder builder (full_mode, npatterns * repeat, 1);
+ for (unsigned int i = 0; i < npatterns; ++i)
+ for (unsigned int j = 0; j < repeat; ++j)
+ builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, i));
+ target = aarch64_target_reg (target, full_mode);
+ return aarch64_expand_sve_const_vector (target, builder.build ());
+ }
if (nelts_per_pattern == 1 && encoded_bits == 128)
{
@@ -4730,8 +4819,7 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
std::swap (mode_with_wider_elts, mode_with_narrower_elts);
unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
- unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
- machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (mode_with_wider_elts);
/* Get the operands in the appropriate modes and emit the instruction. */
ptrue = gen_lowpart (pred_mode, ptrue);
@@ -9971,19 +10059,21 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
machine_mode mode,
secondary_reload_info *sri)
{
- /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
- directly by the *aarch64_sve_mov<mode>_[lb]e move patterns. See the
- comment at the head of aarch64-sve.md for more details about the
- big-endian handling. */
- if (BYTES_BIG_ENDIAN
- && reg_class_subset_p (rclass, FP_REGS)
+ /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
+ LDR and STR. See the comment at the head of aarch64-sve.md for
+ more details about the big-endian handling. */
+ if (reg_class_subset_p (rclass, FP_REGS)
&& !((REG_P (x) && HARD_REGISTER_P (x))
|| aarch64_simd_valid_immediate (x, NULL))
- && mode != VNx16QImode
- && aarch64_sve_data_mode_p (mode))
+ && mode != VNx16QImode)
{
- sri->icode = CODE_FOR_aarch64_sve_reload_be;
- return NO_REGS;
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if ((vec_flags & VEC_SVE_DATA)
+ && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
+ {
+ sri->icode = CODE_FOR_aarch64_sve_reload_mem;
+ return NO_REGS;
+ }
}
/* If we have to disable direct literal pool loads and stores because the
@@ -15837,7 +15927,7 @@ static bool
aarch64_vector_mode_supported_p (machine_mode mode)
{
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0;
+ return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
}
/* Return the full-width SVE vector mode for element mode MODE, if one
@@ -15938,29 +16028,72 @@ aarch64_preferred_simd_mode (scalar_mode mode)
static unsigned int
aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
{
- if (TARGET_SVE)
- modes->safe_push (VNx16QImode);
+ static const machine_mode sve_modes[] = {
+ /* Try using full vectors for all element types. */
+ VNx16QImode,
+
+ /* Try using 16-bit containers for 8-bit elements and full vectors
+ for wider elements. */
+ VNx8QImode,
+
+ /* Try using 32-bit containers for 8-bit and 16-bit elements and
+ full vectors for wider elements. */
+ VNx4QImode,
- /* Try using 128-bit vectors for all element types. */
- modes->safe_push (V16QImode);
+ /* Try using 64-bit containers for all element types. */
+ VNx2QImode
+ };
+
+ static const machine_mode advsimd_modes[] = {
+ /* Try using 128-bit vectors for all element types. */
+ V16QImode,
+
+ /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
+ for wider elements. */
+ V8QImode,
+
+ /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
+ for wider elements.
+
+ TODO: We could support a limited form of V4QImode too, so that
+ we use 32-bit vectors for 8-bit elements. */
+ V4HImode,
+
+ /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
+ for 64-bit elements.
- /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
- for wider elements. */
- modes->safe_push (V8QImode);
+ TODO: We could similarly support limited forms of V2QImode and V2HImode
+ for this case. */
+ V2SImode
+ };
- /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
- for wider elements.
+ /* Try using N-byte SVE modes only after trying N-byte Advanced SIMD mode.
+ This is because:
- TODO: We could support a limited form of V4QImode too, so that
- we use 32-bit vectors for 8-bit elements. */
- modes->safe_push (V4HImode);
+ - If we can't use N-byte Advanced SIMD vectors then the placement
+ doesn't matter; we'll just continue as though the Advanced SIMD
+ entry didn't exist.
- /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
- for 64-bit elements.
+ - If an SVE main loop with N bytes ends up being cheaper than an
+ Advanced SIMD main loop with N bytes then by default we'll replace
+ the Advanced SIMD version with the SVE one.
- TODO: We could similarly support limited forms of V2QImode and V2HImode
- for this case. */
- modes->safe_push (V2SImode);
+ - If an Advanced SIMD main loop with N bytes ends up being cheaper
+ than an SVE main loop with N bytes then by default we'll try to
+ use the SVE loop to vectorize the epilogue instead. */
+ unsigned int sve_i = TARGET_SVE ? 0 : ARRAY_SIZE (sve_modes);
+ unsigned int advsimd_i = 0;
+ while (advsimd_i < ARRAY_SIZE (advsimd_modes))
+ {
+ if (sve_i < ARRAY_SIZE (sve_modes)
+ && maybe_gt (GET_MODE_NUNITS (sve_modes[sve_i]),
+ GET_MODE_NUNITS (advsimd_modes[advsimd_i])))
+ modes->safe_push (sve_modes[sve_i++]);
+ else
+ modes->safe_push (advsimd_modes[advsimd_i++]);
+ }
+ while (sve_i < ARRAY_SIZE (sve_modes))
+ modes->safe_push (sve_modes[sve_i++]);
unsigned int flags = 0;
/* Consider enabling VECT_COMPARE_COSTS for SVE, both so that we
@@ -16507,7 +16640,14 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
return false;
if (info)
- *info = simd_immediate_info (elt_mode, base, step);
+ {
+ /* Get the corresponding container mode. E.g. an INDEX on V2SI
+ should yield two integer values per 128-bit block, meaning
+ that we need to treat it in the same way as V2DI and then
+ ignore the upper 32 bits of each element. */
+ elt_mode = aarch64_sve_container_int_mode (mode);
+ *info = simd_immediate_info (elt_mode, base, step);
+ }
return true;
}
else if (GET_CODE (op) == CONST_VECTOR
@@ -16976,9 +17116,9 @@ aarch64_simd_vector_alignment (const_tree type)
direct way we have of identifying real SVE predicate types. */
if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
return 16;
- if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
- return 128;
- return wi::umin (wi::to_wide (TYPE_SIZE (type)), 128).to_uhwi ();
+ widest_int min_size
+ = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
+ return wi::umin (min_size, 128).to_uhwi ();
}
/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
@@ -19154,7 +19294,7 @@ aarch64_evpc_sel (struct expand_vec_perm_d *d)
if (d->testing_p)
return true;
- machine_mode pred_mode = aarch64_sve_pred_mode (unit_size).require ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (vmode);
rtx_vector_builder builder (pred_mode, n_patterns, 2);
for (int i = 0; i < n_patterns * 2; i++)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fc27179..4c9035f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -344,6 +344,21 @@
VNx4HI VNx2HI
VNx2SI])
+;; All SVE vector modes.
+(define_mode_iterator SVE_ALL [VNx16QI VNx8QI VNx4QI VNx2QI
+ VNx8HI VNx4HI VNx2HI
+ VNx8HF VNx4HF VNx2HF
+ VNx4SI VNx2SI
+ VNx4SF VNx2SF
+ VNx2DI
+ VNx2DF])
+
+;; All SVE integer vector modes.
+(define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
+ VNx8HI VNx4HI VNx2HI
+ VNx4SI VNx2SI
+ VNx2DI])
+
;; Modes involved in extending or truncating SVE data, for 8 elements per
;; 128-bit block.
(define_mode_iterator VNx8_NARROW [VNx8QI])
@@ -776,28 +791,37 @@
(HI "")])
;; Mode-to-individual element type mapping.
-(define_mode_attr Vetype [(V8QI "b") (V16QI "b") (VNx16QI "b") (VNx16BI "b")
- (V4HI "h") (V8HI "h") (VNx8HI "h") (VNx8BI "h")
- (V2SI "s") (V4SI "s") (VNx4SI "s") (VNx4BI "s")
- (V2DI "d") (VNx2DI "d") (VNx2BI "d")
- (V4HF "h") (V8HF "h") (VNx8HF "h")
- (V2SF "s") (V4SF "s") (VNx4SF "s")
- (V2DF "d") (VNx2DF "d")
- (HF "h")
- (SF "s") (DF "d")
- (QI "b") (HI "h")
- (SI "s") (DI "d")])
+(define_mode_attr Vetype [(V8QI "b") (V16QI "b")
+ (V4HI "h") (V8HI "h")
+ (V2SI "s") (V4SI "s")
+ (V2DI "d")
+ (V4HF "h") (V8HF "h")
+ (V2SF "s") (V4SF "s")
+ (V2DF "d")
+ (VNx16BI "b") (VNx8BI "h") (VNx4BI "s") (VNx2BI "d")
+ (VNx16QI "b") (VNx8QI "b") (VNx4QI "b") (VNx2QI "b")
+ (VNx8HI "h") (VNx4HI "h") (VNx2HI "h")
+ (VNx8HF "h") (VNx4HF "h") (VNx2HF "h")
+ (VNx4SI "s") (VNx2SI "s")
+ (VNx4SF "s") (VNx2SF "s")
+ (VNx2DI "d")
+ (VNx2DF "d")
+ (HF "h")
+ (SF "s") (DF "d")
+ (QI "b") (HI "h")
+ (SI "s") (DI "d")])
;; Like Vetype, but map to types that are a quarter of the element size.
(define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")])
;; Equivalent of "size" for a vector element.
-(define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b")
- (VNx4QI "b") (VNx2QI "b")
- (VNx8HI "h") (VNx4HI "h")
- (VNx2HI "h") (VNx8HF "h")
- (VNx4SI "w") (VNx2SI "w") (VNx4SF "w")
- (VNx2DI "d") (VNx2DF "d")
+(define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b") (VNx4QI "b") (VNx2QI "b")
+ (VNx8HI "h") (VNx4HI "h") (VNx2HI "h")
+ (VNx8HF "h") (VNx4HF "h") (VNx2HF "h")
+ (VNx4SI "w") (VNx2SI "w")
+ (VNx4SF "w") (VNx2SF "w")
+ (VNx2DI "d")
+ (VNx2DF "d")
(VNx32QI "b") (VNx48QI "b") (VNx64QI "b")
(VNx16HI "h") (VNx24HI "h") (VNx32HI "h")
(VNx16HF "h") (VNx24HF "h") (VNx32HF "h")
@@ -806,6 +830,16 @@
(VNx4DI "d") (VNx6DI "d") (VNx8DI "d")
(VNx4DF "d") (VNx6DF "d") (VNx8DF "d")])
+;; The Z register suffix for an SVE mode's element container, i.e. the
+;; Vetype of full SVE modes that have the same number of elements.
+(define_mode_attr Vctype [(VNx16QI "b") (VNx8QI "h") (VNx4QI "s") (VNx2QI "d")
+ (VNx8HI "h") (VNx4HI "s") (VNx2HI "d")
+ (VNx8HF "h") (VNx4HF "s") (VNx2HF "d")
+ (VNx4SI "s") (VNx2SI "d")
+ (VNx4SF "s") (VNx2SF "d")
+ (VNx2DI "d")
+ (VNx2DF "d")])
+
;; Vetype is used everywhere in scheduling type and assembly output,
;; sometimes they are not the same, for example HF modes on some
;; instructions. stype is defined to represent scheduling type
@@ -827,26 +861,40 @@
(SI "8b") (SF "8b")])
;; Define element mode for each vector mode.
-(define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI")
- (V4HI "HI") (V8HI "HI") (VNx8HI "HI")
- (V2SI "SI") (V4SI "SI") (VNx4SI "SI")
- (DI "DI") (V2DI "DI") (VNx2DI "DI")
- (V4HF "HF") (V8HF "HF") (VNx8HF "HF")
- (V2SF "SF") (V4SF "SF") (VNx4SF "SF")
- (DF "DF") (V2DF "DF") (VNx2DF "DF")
- (SI "SI") (HI "HI")
- (QI "QI")])
+(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
+ (V4HI "HI") (V8HI "HI")
+ (V2SI "SI") (V4SI "SI")
+ (DI "DI") (V2DI "DI")
+ (V4HF "HF") (V8HF "HF")
+ (V2SF "SF") (V4SF "SF")
+ (DF "DF") (V2DF "DF")
+ (SI "SI") (HI "HI")
+ (QI "QI")
+ (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
+ (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
+ (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
+ (VNx4SI "SI") (VNx2SI "SI")
+ (VNx4SF "SF") (VNx2SF "SF")
+ (VNx2DI "DI")
+ (VNx2DF "DF")])
;; Define element mode for each vector mode (lower case).
-(define_mode_attr Vel [(V8QI "qi") (V16QI "qi") (VNx16QI "qi")
- (V4HI "hi") (V8HI "hi") (VNx8HI "hi")
- (V2SI "si") (V4SI "si") (VNx4SI "si")
- (DI "di") (V2DI "di") (VNx2DI "di")
- (V4HF "hf") (V8HF "hf") (VNx8HF "hf")
- (V2SF "sf") (V4SF "sf") (VNx4SF "sf")
- (V2DF "df") (DF "df") (VNx2DF "df")
- (SI "si") (HI "hi")
- (QI "qi")])
+(define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
+ (V4HI "hi") (V8HI "hi")
+ (V2SI "si") (V4SI "si")
+ (DI "di") (V2DI "di")
+ (V4HF "hf") (V8HF "hf")
+ (V2SF "sf") (V4SF "sf")
+ (V2DF "df") (DF "df")
+ (SI "si") (HI "hi")
+ (QI "qi")
+ (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
+ (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
+ (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
+ (VNx4SI "si") (VNx2SI "si")
+ (VNx4SF "sf") (VNx2SF "sf")
+ (VNx2DI "di")
+ (VNx2DF "df")])
;; Element mode with floating-point values replaced by like-sized integers.
(define_mode_attr VEL_INT [(VNx16QI "QI")
@@ -994,23 +1042,29 @@
(V4SF "2s")])
;; Define corresponding core/FP element mode for each vector mode.
-(define_mode_attr vw [(V8QI "w") (V16QI "w") (VNx16QI "w")
- (V4HI "w") (V8HI "w") (VNx8HI "w")
- (V2SI "w") (V4SI "w") (VNx4SI "w")
- (DI "x") (V2DI "x") (VNx2DI "x")
- (VNx8HF "h")
- (V2SF "s") (V4SF "s") (VNx4SF "s")
- (V2DF "d") (VNx2DF "d")])
+(define_mode_attr vw [(V8QI "w") (V16QI "w")
+ (V4HI "w") (V8HI "w")
+ (V2SI "w") (V4SI "w")
+ (DI "x") (V2DI "x")
+ (V2SF "s") (V4SF "s")
+ (V2DF "d")])
;; Corresponding core element mode for each vector mode. This is a
;; variation on <vw> mapping FP modes to GP regs.
-(define_mode_attr vwcore [(V8QI "w") (V16QI "w") (VNx16QI "w")
- (V4HI "w") (V8HI "w") (VNx8HI "w")
- (V2SI "w") (V4SI "w") (VNx4SI "w")
- (DI "x") (V2DI "x") (VNx2DI "x")
- (V4HF "w") (V8HF "w") (VNx8HF "w")
- (V2SF "w") (V4SF "w") (VNx4SF "w")
- (V2DF "x") (VNx2DF "x")])
+(define_mode_attr vwcore [(V8QI "w") (V16QI "w")
+ (V4HI "w") (V8HI "w")
+ (V2SI "w") (V4SI "w")
+ (DI "x") (V2DI "x")
+ (V4HF "w") (V8HF "w")
+ (V2SF "w") (V4SF "w")
+ (V2DF "x")
+ (VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "w")
+ (VNx8HI "w") (VNx4HI "w") (VNx2HI "w")
+ (VNx8HF "w") (VNx4HF "w") (VNx2HF "w")
+ (VNx4SI "w") (VNx2SI "w")
+ (VNx4SF "w") (VNx2SF "w")
+ (VNx2DI "x")
+ (VNx2DF "x")])
;; Double vector types for ALLX.
(define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
@@ -1248,10 +1302,14 @@
;; The predicate mode associated with an SVE data mode. For structure modes
;; this is equivalent to the <VPRED> of the subvector mode.
-(define_mode_attr VPRED [(VNx16QI "VNx16BI")
- (VNx8HI "VNx8BI") (VNx8HF "VNx8BI")
- (VNx4SI "VNx4BI") (VNx4SF "VNx4BI")
- (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")
+(define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
+ (VNx4QI "VNx4BI") (VNx2QI "VNx2BI")
+ (VNx8HI "VNx8BI") (VNx4HI "VNx4BI") (VNx2HI "VNx2BI")
+ (VNx8HF "VNx8BI") (VNx4HF "VNx4BI") (VNx2HF "VNx2BI")
+ (VNx4SI "VNx4BI") (VNx2SI "VNx2BI")
+ (VNx4SF "VNx4BI") (VNx2SF "VNx2BI")
+ (VNx2DI "VNx2BI")
+ (VNx2DF "VNx2BI")
(VNx32QI "VNx16BI")
(VNx16HI "VNx8BI") (VNx16HF "VNx8BI")
(VNx8SI "VNx4BI") (VNx8SF "VNx4BI")
@@ -1266,10 +1324,14 @@
(VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
;; ...and again in lower case.
-(define_mode_attr vpred [(VNx16QI "vnx16bi")
- (VNx8HI "vnx8bi") (VNx8HF "vnx8bi")
- (VNx4SI "vnx4bi") (VNx4SF "vnx4bi")
- (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")
+(define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
+ (VNx4QI "vnx4bi") (VNx2QI "vnx2bi")
+ (VNx8HI "vnx8bi") (VNx4HI "vnx4bi") (VNx2HI "vnx2bi")
+ (VNx8HF "vnx8bi") (VNx4HF "vnx4bi") (VNx2HF "vnx2bi")
+ (VNx4SI "vnx4bi") (VNx2SI "vnx2bi")
+ (VNx4SF "vnx4bi") (VNx2SF "vnx2bi")
+ (VNx2DI "vnx2bi")
+ (VNx2DF "vnx2bi")
(VNx32QI "vnx16bi")
(VNx16HI "vnx8bi") (VNx16HF "vnx8bi")
(VNx8SI "vnx4bi") (VNx8SF "vnx4bi")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8b48698..57505e9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,13 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/mixed_size_1.c: New test.
+ * gcc.target/aarch64/sve/mixed_size_2.c: Likewise.
+ * gcc.target/aarch64/sve/mixed_size_3.c: Likewise.
+ * gcc.target/aarch64/sve/mixed_size_4.c: Likewise.
+ * gcc.target/aarch64/sve/mixed_size_5.c: Likewise.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/clastb_8.c: Use assembly tests to
check for fully-masked loops.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_1.c
new file mode 100644
index 0000000..a5659b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_1.c
@@ -0,0 +1,39 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2) \
+ void \
+ f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst1, TYPE1 *restrict src1, \
+ TYPE2 *restrict dst2, TYPE2 *restrict src2, \
+ int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst1[i] += src1[i]; \
+ dst2[i] = src2[i]; \
+ } \
+ }
+
+#define TEST_ALL(T) \
+ T (uint16_t, uint8_t) \
+ T (uint32_t, uint16_t) \
+ T (uint32_t, _Float16) \
+ T (uint64_t, uint32_t) \
+ T (uint64_t, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_2.c
new file mode 100644
index 0000000..34b58e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_2.c
@@ -0,0 +1,41 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2) \
+ void \
+ f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst1, TYPE1 *restrict src1, \
+ TYPE2 *restrict dst2, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst1[i] += src1[i]; \
+ dst2[i] = 1; \
+ } \
+ }
+
+#define TEST_ALL(T) \
+ T (uint16_t, uint8_t) \
+ T (uint32_t, uint16_t) \
+ T (uint32_t, _Float16) \
+ T (uint64_t, uint32_t) \
+ T (uint64_t, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #1\.0} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1\.0} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_3.c
new file mode 100644
index 0000000..9ae3e7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_3.c
@@ -0,0 +1,41 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2) \
+ void \
+ f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst1, TYPE1 *restrict src1, \
+ TYPE2 *restrict dst2, TYPE2 src2, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst1[i] += src1[i]; \
+ dst2[i] = src2; \
+ } \
+ }
+
+#define TEST_ALL(T) \
+ T (uint16_t, uint8_t) \
+ T (uint32_t, uint16_t) \
+ T (uint32_t, _Float16) \
+ T (uint64_t, uint32_t) \
+ T (uint64_t, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, w3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, w3\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, h0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, s0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_4.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_4.c
new file mode 100644
index 0000000..4c475fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_4.c
@@ -0,0 +1,43 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2) \
+ void \
+ f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst1, TYPE1 *restrict src1, \
+ TYPE2 *restrict dst2, TYPE2 n) \
+ { \
+ for (TYPE2 i = 0; i < n; ++i) \
+ { \
+ dst1[i] += src1[i]; \
+ dst2[i] = i; \
+ } \
+ }
+
+#define TEST_ALL(T) \
+ T (uint16_t, uint8_t) \
+ T (uint32_t, uint16_t) \
+ T (uint64_t, uint32_t)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.b,} } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #1\n} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tcntb\t} } } */
+/* { dg-final { scan-assembler-times {\tcnth\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tcntw\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tcntd\t} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_5.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_5.c
new file mode 100644
index 0000000..83be00f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_5.c
@@ -0,0 +1,42 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -msve-vector-bits=512" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE1, TYPE2) \
+ void \
+ f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst1, TYPE1 *restrict src1, \
+ TYPE2 *restrict dst2, TYPE2 *restrict src2, \
+ int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst1[i * 2] = src1[i * 2] + 1; \
+ dst1[i * 2 + 1] = src1[i * 2 + 1] + 1; \
+ dst2[i * 2] = 2; \
+ dst2[i * 2 + 1] = 3; \
+ } \
+ }
+
+#define TEST_ALL(T) \
+ T (uint16_t, uint8_t) \
+ T (uint32_t, uint16_t) \
+ T (uint32_t, _Float16) \
+ T (uint64_t, uint32_t) \
+ T (uint64_t, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */