aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-11-16 11:07:23 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-11-16 11:07:23 +0000
commite58703e2c1b09d39714740e00933d32df965de32 (patch)
tree40e761cc115f68b373d1f519ff642533d4c20111 /gcc/config
parentcc68f7c2dabbf9d90ebe025a11f6d6ed5d49a113 (diff)
downloadgcc-e58703e2c1b09d39714740e00933d32df965de32.zip
gcc-e58703e2c1b09d39714740e00933d32df965de32.tar.gz
gcc-e58703e2c1b09d39714740e00933d32df965de32.tar.bz2
[AArch64] Add sign and zero extension for partial SVE modes
This patch adds support for extending from partial SVE modes to both full vector modes and wider partial modes. Some tests now need --param aarch64-sve-compare-costs=0 to force the original full-vector code. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (SVE_HSDI): New mode iterator. (narrower_mask): Handle VNx4HI, VNx2HI and VNx2SI. * config/aarch64/aarch64-sve.md (<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): New pattern. (*<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): Likewise. (@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Update comment. Avoid new narrower_mask ambiguity. (@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Likewise. (*cond_uxt<mode>_2): Update comment. (*cond_uxt<mode>_any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cost_model_1.c: Expect the loop to be vectorized with bytes stored in 32-bit containers. * gcc.target/aarch64/sve/extend_1.c: New test. * gcc.target/aarch64/sve/extend_2.c: New test. * gcc.target/aarch64/sve/extend_3.c: New test. * gcc.target/aarch64/sve/extend_4.c: New test. * gcc.target/aarch64/sve/load_const_offset_3.c: Add --param aarch64-sve-compare-costs=0. * gcc.target/aarch64/sve/mask_struct_store_1.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_1_run.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_2.c: Likewise. * gcc.target/aarch64/sve/mask_struct_store_2_run.c: Likewise. * gcc.target/aarch64/sve/unpack_unsigned_1.c: Likewise. * gcc.target/aarch64/sve/unpack_unsigned_1_run.c: Likewise. From-SVN: r278342
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-sve.md60
-rw-r--r--gcc/config/aarch64/iterators.md12
2 files changed, 51 insertions, 21 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b43d4fb..40aeb95 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -71,8 +71,7 @@
;; == Unary arithmetic
;; ---- [INT] General unary arithmetic corresponding to rtx codes
;; ---- [INT] General unary arithmetic corresponding to unspecs
-;; ---- [INT] Sign extension
-;; ---- [INT] Zero extension
+;; ---- [INT] Sign and zero extension
;; ---- [INT] Logical inverse
;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
;; ---- [FP] General unary arithmetic corresponding to unspecs
@@ -2812,15 +2811,44 @@
)
;; -------------------------------------------------------------------------
-;; ---- [INT] Sign extension
+;; ---- [INT] Sign and zero extension
;; -------------------------------------------------------------------------
;; Includes:
;; - SXTB
;; - SXTH
;; - SXTW
+;; - UXTB
+;; - UXTH
+;; - UXTW
;; -------------------------------------------------------------------------
-;; Predicated SXT[BHW].
+;; Unpredicated sign and zero extension from a narrower mode.
+(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
+ [(set (match_operand:SVE_HSDI 0 "register_operand")
+ (unspec:SVE_HSDI
+ [(match_dup 2)
+ (ANY_EXTEND:SVE_HSDI
+ (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ {
+ operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
+ }
+)
+
+;; Predicated sign and zero extension from a narrower mode.
+(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
+ [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_HSDI
+ [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
+ (ANY_EXTEND:SVE_HSDI
+ (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+ "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
+)
+
+;; Predicated truncate-and-sign-extend operations.
(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
@@ -2829,11 +2857,12 @@
(truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
UNSPEC_PRED_X))]
- "TARGET_SVE && (~<narrower_mask> & <self_mask>) == 0"
+ "TARGET_SVE
+ && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
)
-;; Predicated SXT[BHW] with merging.
+;; Predicated truncate-and-sign-extend operations with merging.
(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
(unspec:SVE_FULL_HSDI
@@ -2843,7 +2872,8 @@
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
(match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
- "TARGET_SVE && (~<narrower_mask> & <self_mask>) == 0"
+ "TARGET_SVE
+ && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"@
sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
@@ -2851,17 +2881,11 @@
[(set_attr "movprfx" "*,yes,yes")]
)
-;; -------------------------------------------------------------------------
-;; ---- [INT] Zero extension
-;; -------------------------------------------------------------------------
-;; Includes:
-;; - UXTB
-;; - UXTH
-;; - UXTW
-;; -------------------------------------------------------------------------
-
-;; Match UXT[BHW] as a conditional AND of a constant, merging with the
+;; Predicated truncate-and-zero-extend operations, merging with the
;; first input.
+;;
+;; The canonical form of this operation is an AND of a constant rather
+;; than (zero_extend (truncate ...)).
(define_insn "*cond_uxt<mode>_2"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
@@ -2878,7 +2902,7 @@
[(set_attr "movprfx" "*,yes")]
)
-;; Match UXT[BHW] as a conditional AND of a constant, merging with an
+;; Predicated truncate-and-zero-extend operations, merging with an
;; independent value.
;;
;; The earlyclobber isn't needed for the first alternative, but omitting
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 4c9035f..06e91eb 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -359,6 +359,11 @@
VNx4SI VNx2SI
VNx2DI])
+;; SVE integer vector modes whose elements are 16 bits or wider.
+(define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
+ VNx4SI VNx2SI
+ VNx2DI])
+
;; Modes involved in extending or truncating SVE data, for 8 elements per
;; 128-bit block.
(define_mode_iterator VNx8_NARROW [VNx8QI])
@@ -1364,9 +1369,10 @@
(VNx2HI "0x22")
(VNx2SI "0x24")])
-;; For full vector modes, the mask of narrower modes, encoded as above.
-(define_mode_attr narrower_mask [(VNx8HI "0x81")
- (VNx4SI "0x43")
+;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
+(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
+ (VNx2HI "0x21")
+ (VNx4SI "0x43") (VNx2SI "0x23")
(VNx2DI "0x27")])
;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.