aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndre Vieira <andre.simoesdiasvieira@arm.com>2021-10-13 16:42:47 +0100
committerAndre Vieira <andre.simoesdiasvieira@arm.com>2021-10-13 16:44:03 +0100
commit5efeaa0d29525fa28e189e6278c1b1651fb0d7bf (patch)
treeb295b1d5cae56f6685ce89fdcbd415f871366787 /gcc
parent20995e68c28c1091be915f5ab4fcfa45c1cd0bc5 (diff)
downloadgcc-5efeaa0d29525fa28e189e6278c1b1651fb0d7bf.zip
gcc-5efeaa0d29525fa28e189e6278c1b1651fb0d7bf.tar.gz
gcc-5efeaa0d29525fa28e189e6278c1b1651fb0d7bf.tar.bz2
[arm] Fix MVE addressing modes for VLDR[BHW] and VSTR[BHW]
The way we were previously dealing with addressing modes for MVE was preventing the use of pre, post and offset addressing modes for the normal loads and stores, including widening and narrowing. This patch fixes that and adds tests to ensure we are capable of using all the available addressing modes. gcc/ChangeLog: 2021-10-12 Andre Vieira <andre.simoesdiasvieira@arm.com> * config/arm/arm.c (thumb2_legitimate_address_p): Use VALID_MVE_MODE when checking mve addressing modes. (mve_vector_mem_operand): Fix the way we handle pre, post and offset addressing modes. (arm_print_operand): Fix printing of POST_ and PRE_MODIFY. * config/arm/mve.md: Use mve_memory_operand predicate everywhere where there is a single Ux constraint. gcc/testsuite/ChangeLog: 2021-10-12 Andre Vieira <andre.simoesdiasvieira@arm.com> * gcc.target/arm/mve/mve.exp: Make it test main directory. * gcc.target/arm/mve/mve_load_memory_modes.c: New test. * gcc.target/arm/mve/mve_store_memory_modes.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm.c63
-rw-r--r--gcc/config/arm/mve.md16
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/mve.exp3
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c357
-rw-r--r--gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c370
5 files changed, 767 insertions, 42 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 471128f..d8c5d2b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8531,8 +8531,7 @@ thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
bool use_ldrd;
enum rtx_code code = GET_CODE (x);
- if (TARGET_HAVE_MVE
- && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
+ if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
return mve_vector_mem_operand (mode, x, strict_p);
if (arm_address_register_rtx_p (x, strict_p))
@@ -13434,53 +13433,49 @@ mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
|| code == PRE_INC || code == POST_DEC)
{
reg_no = REGNO (XEXP (op, 0));
- return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
- ? reg_no <= LAST_LO_REGNUM
- :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
- }
- else if ((code == POST_MODIFY || code == PRE_MODIFY)
- && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
+ return ((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
+ ? reg_no <= LAST_LO_REGNUM
+ :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
+ || reg_no >= FIRST_PSEUDO_REGISTER;
+ }
+ else if (((code == POST_MODIFY || code == PRE_MODIFY)
+ && GET_CODE (XEXP (op, 1)) == PLUS
+ && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
+ && REG_P (XEXP (op, 0))
+ && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
+ /* Make sure to only accept PLUS after reload_completed, otherwise
+ this will interfere with auto_inc's pattern detection. */
+ || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
+ && GET_CODE (XEXP (op, 1)) == CONST_INT))
{
reg_no = REGNO (XEXP (op, 0));
- val = INTVAL (XEXP ( XEXP (op, 1), 1));
+ if (code == PLUS)
+ val = INTVAL (XEXP (op, 1));
+ else
+ val = INTVAL (XEXP(XEXP (op, 1), 1));
+
switch (mode)
{
case E_V16QImode:
- if (abs (val) <= 127)
- return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
- return FALSE;
- case E_V8HImode:
- case E_V8HFmode:
- if (abs (val) <= 255)
- return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
- return FALSE;
case E_V8QImode:
case E_V4QImode:
if (abs (val) <= 127)
- return (reg_no <= LAST_LO_REGNUM
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
+ return (reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
+ || reg_no >= FIRST_PSEUDO_REGISTER;
return FALSE;
+ case E_V8HImode:
+ case E_V8HFmode:
case E_V4HImode:
case E_V4HFmode:
if (val % 2 == 0 && abs (val) <= 254)
- return (reg_no <= LAST_LO_REGNUM
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
+ return reg_no <= LAST_LO_REGNUM
+ || reg_no >= FIRST_PSEUDO_REGISTER;
return FALSE;
case E_V4SImode:
case E_V4SFmode:
if (val % 4 == 0 && abs (val) <= 508)
- return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
- return FALSE;
- case E_V2DImode:
- case E_V2DFmode:
- case E_TImode:
- if (val % 4 == 0 && val >= 0 && val <= 1020)
- return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
- || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
+ return (reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
+ || reg_no >= FIRST_PSEUDO_REGISTER;
return FALSE;
default:
return FALSE;
@@ -24277,7 +24272,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
else if (code == POST_MODIFY || code == PRE_MODIFY)
{
asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
- postinc_reg = XEXP ( XEXP (x, 1), 1);
+ postinc_reg = XEXP (XEXP (addr, 1), 1);
if (postinc_reg && CONST_INT_P (postinc_reg))
{
if (code == POST_MODIFY)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index e393518..a66af4d 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -7570,7 +7570,7 @@
;;
(define_insn "mve_vldrwq_fv4sf"
[(set (match_operand:V4SF 0 "s_register_operand" "=w")
- (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Ux")]
+ (unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
VLDRWQ_F))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
@@ -7589,7 +7589,7 @@
;;
(define_insn "mve_vldrwq_<supf>v4si"
[(set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Ux")]
+ (unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
VLDRWQ))
]
"TARGET_HAVE_MVE"
@@ -7608,7 +7608,7 @@
;;
(define_insn "mve_vldrwq_z_fv4sf"
[(set (match_operand:V4SF 0 "s_register_operand" "=w")
- (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Ux")
+ (unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
(match_operand:HI 2 "vpr_register_operand" "Up")]
VLDRWQ_F))
]
@@ -7628,7 +7628,7 @@
;;
(define_insn "mve_vldrwq_z_<supf>v4si"
[(set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Ux")
+ (unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
(match_operand:HI 2 "vpr_register_operand" "Up")]
VLDRWQ))
]
@@ -8282,7 +8282,7 @@
;; [vstrwq_f]
;;
(define_insn "mve_vstrwq_fv4sf"
- [(set (match_operand:V4SI 0 "memory_operand" "=Ux")
+ [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
(unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w")]
VSTRWQ_F))
]
@@ -8301,7 +8301,7 @@
;; [vstrwq_p_f]
;;
(define_insn "mve_vstrwq_p_fv4sf"
- [(set (match_operand:V4SI 0 "memory_operand" "=Ux")
+ [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
(unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w")
(match_operand:HI 2 "vpr_register_operand" "Up")]
VSTRWQ_F))
@@ -8321,7 +8321,7 @@
;; [vstrwq_p_s vstrwq_p_u]
;;
(define_insn "mve_vstrwq_p_<supf>v4si"
- [(set (match_operand:V4SI 0 "memory_operand" "=Ux")
+ [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
(match_operand:HI 2 "vpr_register_operand" "Up")]
VSTRWQ))
@@ -8341,7 +8341,7 @@
;; [vstrwq_s vstrwq_u]
;;
(define_insn "mve_vstrwq_<supf>v4si"
- [(set (match_operand:V4SI 0 "memory_operand" "=Ux")
+ [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")]
VSTRWQ))
]
diff --git a/gcc/testsuite/gcc.target/arm/mve/mve.exp b/gcc/testsuite/gcc.target/arm/mve/mve.exp
index d09fc27..a858e52 100644
--- a/gcc/testsuite/gcc.target/arm/mve/mve.exp
+++ b/gcc/testsuite/gcc.target/arm/mve/mve.exp
@@ -44,6 +44,9 @@ dg-init
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/intrinsics/*.\[cCS\]]] \
"" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
+ "" $DEFAULT_CFLAGS
+
# All done.
set dg_runtest_extra_prunes ""
dg-finish
diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
new file mode 100644
index 0000000..e35eb11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_load_memory_modes.c
@@ -0,0 +1,357 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+/*
+**off_load8_0:
+** ...
+** vldrb.8 q0, \[r0, #16\]
+** ...
+*/
+int8x16_t off_load8_0 (int8_t * a)
+{
+ return vld1q_s8 (a + 16);
+}
+
+/*
+**off_load8_1:
+** ...
+** vldrb.u16 q0, \[r0, #1\]
+** ...
+*/
+uint16x8_t off_load8_1 (uint8_t * a)
+{
+ return vldrbq_u16 (a + 1);
+}
+
+/*
+**off_load8_2:
+** ...
+** vldrb.s32 q0, \[r0, #127\]
+** ...
+*/
+int32x4_t off_load8_2 (int8_t * a)
+{
+ return vldrbq_s32 (a + 127);
+}
+
+/*
+**off_load8_3:
+** ...
+** vldrb.8 q0, \[r0, #-127\]
+** ...
+*/
+uint8x16_t off_load8_3 (uint8_t * a)
+{
+ return vldrbq_u8 (a - 127);
+}
+
+/*
+**not_off_load8_0:
+** ...
+** vldrb.8 q0, \[r[0-9]+\]
+** ...
+*/
+int8x16_t not_off_load8_0 (int8_t * a)
+{
+ return vld1q_s8 (a + 128);
+}
+
+/*
+**off_loadfp16_0:
+** ...
+** vldrh.16 q0, \[r0, #-244\]
+** ...
+*/
+float16x8_t off_loadfp16_0 (float16_t *a)
+{
+ return vld1q_f16 (a - 122);
+}
+
+/*
+**off_load16_0:
+** ...
+** vldrh.16 q0, \[r0, #-2\]
+** ...
+*/
+uint16x8_t off_load16_0 (uint16_t * a)
+{
+ return vld1q_u16 (a - 1);
+}
+
+/*
+**off_load16_1:
+** ...
+** vldrh.u32 q0, \[r0, #254\]
+** ...
+*/
+uint32x4_t off_load16_1 (uint16_t * a)
+{
+ return vldrhq_u32 (a + 127);
+}
+
+/*
+**not_off_load16_0:
+** ...
+** vldrh.16 q0, \[r[0-9]+\]
+** ...
+*/
+int16x8_t not_off_load16_0 (int8_t * a)
+{
+ return vld1q_s16 ((int16_t *)(a + 1));
+}
+
+/*
+**not_off_load16_1:
+** ...
+** vldrh.u32 q0, \[r[0-9]+\]
+** ...
+*/
+uint32x4_t not_off_load16_1 (uint16_t * a)
+{
+ return vldrhq_u32 ((a - 128));
+}
+
+/*
+**off_loadfp32_0:
+** ...
+** vldrw.32 q0, \[r0, #24\]
+** ...
+*/
+float32x4_t off_loadfp32_0 (float32_t *a)
+{
+ return vld1q_f32 (a + 6);
+}
+
+/*
+**off_load32_0:
+** ...
+** vldrw.32 q0, \[r0, #4\]
+** ...
+*/
+uint32x4_t off_load32_0 (uint32_t * a)
+{
+ return vld1q_u32 (a + 1);
+}
+
+/*
+**off_load32_1:
+** ...
+** vldrw.32 q0, \[r0, #-508\]
+** ...
+*/
+int32x4_t off_load32_1 (int32_t * a)
+{
+ return vldrwq_s32 (a - 127);
+}
+/*
+**pre_load8_0:
+** ...
+** vldrb.8 q[0-9]+, \[r0, #16\]!
+** ...
+*/
+int8_t* pre_load8_0 (int8_t * a, int8x16_t *v)
+{
+ a += 16;
+ *v = vld1q_s8 (a);
+ return a;
+}
+
+/*
+**pre_load8_1:
+** ...
+** vldrb.u16 q[0-9]+, \[r0, #4\]!
+** ...
+*/
+uint8_t* pre_load8_1 (uint8_t * a, uint16x8_t *v)
+{
+ a += 4;
+ *v = vldrbq_u16 (a);
+ return a;
+}
+
+/*
+**pre_loadfp16_0:
+** ...
+** vldrh.16 q[0-9]+, \[r0, #128\]!
+** ...
+*/
+float16_t* pre_loadfp16_0 (float16_t *a, float16x8_t *v)
+{
+ a += 64;
+ *v = vld1q_f16 (a);
+ return a;
+}
+
+/*
+**pre_load16_0:
+** ...
+** vldrh.16 q[0-9]+, \[r0, #-254\]!
+** ...
+*/
+int16_t* pre_load16_0 (int16_t * a, int16x8_t *v)
+{
+ a -= 127;
+ *v = vldrhq_s16 (a);
+ return a;
+}
+
+/*
+**pre_load16_1:
+** ...
+** vldrh.s32 q[0-9]+, \[r0, #52\]!
+** ...
+*/
+int16_t* pre_load16_1 (int16_t * a, int32x4_t *v)
+{
+ a += 26;
+ *v = vldrhq_s32 (a);
+ return a;
+}
+
+/*
+**pre_loadfp32_0:
+** ...
+** vldrw.32 q[0-9]+, \[r0, #-72\]!
+** ...
+*/
+float32_t* pre_loadfp32_0 (float32_t *a, float32x4_t *v)
+{
+ a -= 18;
+ *v = vld1q_f32 (a);
+ return a;
+}
+
+
+/*
+**pre_load32_0:
+** ...
+** vldrw.32 q[0-9]+, \[r0, #-4\]!
+** ...
+*/
+uint32_t* pre_load32_0 (uint32_t * a, uint32x4_t *v)
+{
+ a -= 1;
+ *v = vld1q_u32 (a);
+ return a;
+}
+
+
+/*
+**post_load8_0:
+** ...
+** vldrb.8 q[0-9]+, \[r0\], #26
+** ...
+*/
+uint8_t* post_load8_0 (uint8_t * a, uint8x16_t *v)
+{
+ *v = vld1q_u8 (a);
+ a += 26;
+ return a;
+}
+
+/*
+**post_load8_1:
+** ...
+** vldrb.s16 q[0-9]+, \[r0\], #-1
+** ...
+*/
+int8_t* post_load8_1 (int8_t * a, int16x8_t *v)
+{
+ *v = vldrbq_s16 (a);
+ a--;
+ return a;
+}
+
+/*
+**post_load8_2:
+** ...
+** vldrb.8 q[0-9]+, \[r0\], #26
+** ...
+*/
+uint8_t* post_load8_2 (uint8_t * a, uint8x16_t *v)
+{
+ *v = vld1q_u8 (a);
+ a += 26;
+ return a;
+}
+
+/*
+**post_load8_3:
+** ...
+** vldrb.s16 q[0-9]+, \[r0\], #-1
+** ...
+*/
+int8_t* post_load8_3 (int8_t * a, int16x8_t *v)
+{
+ *v = vldrbq_s16 (a);
+ a--;
+ return a;
+}
+
+/*
+**post_loadfp16_0:
+** ...
+** vldrh.16 q[0-9]+, \[r0\], #-24
+** ...
+*/
+float16_t* post_loadfp16_0 (float16_t *a, float16x8_t *v)
+{
+ *v = vld1q_f16 (a);
+ a -= 12;
+ return a;
+}
+
+/*
+**post_load16_0:
+** ...
+** vldrh.16 q[0-9]+, \[r0\], #-126
+** ...
+*/
+uint16_t* post_load16_0 (uint16_t * a, uint16x8_t *v)
+{
+ *v = vldrhq_u16 (a);
+ a -= 63;
+ return a;
+}
+
+/*
+**post_load16_1:
+** ...
+** vldrh.u32 q[0-9]+, \[r0\], #16
+** ...
+*/
+uint16_t* post_load16_1 (uint16_t * a, uint32x4_t *v)
+{
+ *v = vldrhq_u32 (a);
+ a += 8;
+ return a;
+}
+
+/*
+**post_loadfp32_0:
+** ...
+** vldrw.32 q[0-9]+, \[r0\], #4
+** ...
+*/
+float32_t* post_loadfp32_0 (float32_t *a, float32x4_t *v)
+{
+ *v = vld1q_f32 (a);
+ a++;
+ return a;
+}
+
+/*
+**post_load32_0:
+** ...
+** vldrw.32 q[0-9]+, \[r0\], #-16
+** ...
+*/
+int32_t* post_load32_0 (int32_t * a, int32x4_t *v)
+{
+ *v = vld1q_s32 (a);
+ a -= 4;
+ return a;
+}
diff --git a/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
new file mode 100644
index 0000000..632f5b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/mve_store_memory_modes.c
@@ -0,0 +1,370 @@
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_mve.h"
+/*
+**off_store8_0:
+** ...
+** vstrb.8 q0, \[r0, #16\]
+** ...
+*/
+uint8_t *off_store8_0 (uint8_t * a, uint8x16_t v)
+{
+ vst1q_u8 (a + 16, v);
+ return a;
+}
+
+/*
+**off_store8_1:
+** ...
+** vstrb.16 q0, \[r0, #-1\]
+** ...
+*/
+int8_t *off_store8_1 (int8_t * a, int16x8_t v)
+{
+ vstrbq_s16 (a - 1, v);
+ return a;
+}
+
+/*
+**off_store8_2:
+** ...
+** vstrb.32 q0, \[r0, #-127\]
+** ...
+*/
+uint8_t *off_store8_2 (uint8_t * a, uint32x4_t v)
+{
+ vstrbq_u32 (a - 127, v);
+ return a;
+}
+
+/*
+**off_store8_3:
+** ...
+** vstrb.8 q0, \[r0, #127\]
+** ...
+*/
+int8_t *off_store8_3 (int8_t * a, int8x16_t v)
+{
+ vstrbq_s8 (a + 127, v);
+ return a;
+}
+
+/*
+**not_off_store8_0:
+** ...
+** vstrb.8 q0, \[r[0-9]+\]
+** ...
+*/
+uint8_t *not_off_store8_0 (uint8_t * a, uint8x16_t v)
+{
+ vst1q_u8 (a - 128, v);
+ return a;
+}
+
+/*
+**off_storefp16_0:
+** ...
+** vstrh.16 q0, \[r0, #250\]
+** ...
+*/
+float16_t *off_storefp16_0 (float16_t *a, float16x8_t v)
+{
+ vst1q_f16 (a + 125, v);
+ return a;
+}
+
+/*
+**off_store16_0:
+** ...
+** vstrh.16 q0, \[r0, #4\]
+** ...
+*/
+int16_t *off_store16_0 (int16_t * a, int16x8_t v)
+{
+ vst1q_s16 (a + 2, v);
+ return a;
+}
+
+/*
+**off_store16_1:
+** ...
+** vstrh.32 q0, \[r0, #-254\]
+** ...
+*/
+int16_t *off_store16_1 (int16_t * a, int32x4_t v)
+{
+ vstrhq_s32 (a - 127, v);
+ return a;
+}
+
+/*
+**not_off_store16_0:
+** ...
+** vstrh.16 q0, \[r[0-9]+\]
+** ...
+*/
+uint8_t *not_off_store16_0 (uint8_t * a, uint16x8_t v)
+{
+ vst1q_u16 ((uint16_t *)(a - 1), v);
+ return a;
+}
+
+/*
+**not_off_store16_1:
+** ...
+** vstrh.32 q0, \[r[0-9]+\]
+** ...
+*/
+int16_t *not_off_store16_1 (int16_t * a, int32x4_t v)
+{
+ vstrhq_s32 ((a + 128), v);
+ return a;
+}
+
+/*
+**off_storefp32_0:
+** ...
+** vstrw.32 q0, \[r0, #-412\]
+** ...
+*/
+float32_t *off_storefp32_0 (float32_t *a, float32x4_t v)
+{
+ vst1q_f32 (a - 103, v);
+ return a;
+}
+
+/*
+**off_store32_0:
+** ...
+** vstrw.32 q0, \[r0, #-4\]
+** ...
+*/
+int32_t *off_store32_0 (int32_t * a, int32x4_t v)
+{
+ vst1q_s32 (a - 1, v);
+ return a;
+}
+
+/*
+**off_store32_1:
+** ...
+** vstrw.32 q0, \[r0, #508\]
+** ...
+*/
+uint32_t *off_store32_1 (uint32_t * a, uint32x4_t v)
+{
+ vstrwq_u32 (a + 127, v);
+ return a;
+}
+
+/*
+**pre_store8_0:
+** ...
+** vstrb.8 q[0-9]+, \[r0, #-16\]!
+** ...
+*/
+uint8_t* pre_store8_0 (uint8_t * a, uint8x16_t v)
+{
+ a -= 16;
+ vst1q_u8 (a, v);
+ return a;
+}
+
+/*
+**pre_store8_1:
+** ...
+** vstrb.16 q[0-9]+, \[r0, #4\]!
+** ...
+*/
+int8_t* pre_store8_1 (int8_t * a, int16x8_t v)
+{
+ a += 4;
+ vstrbq_s16 (a, v);
+ return a;
+}
+
+/*
+**pre_storefp16_0:
+** ...
+** vstrh.16 q0, \[r0, #8\]!
+** ...
+*/
+float16_t *pre_storefp16_0 (float16_t *a, float16x8_t v)
+{
+ a += 4;
+ vst1q_f16 (a, v);
+ return a;
+}
+
+/*
+**pre_store16_0:
+** ...
+** vstrh.16 q[0-9]+, \[r0, #254\]!
+** ...
+*/
+uint16_t* pre_store16_0 (uint16_t * a, uint16x8_t v)
+{
+ a += 127;
+ vstrhq_u16 (a, v);
+ return a;
+}
+
+/*
+**pre_store16_1:
+** ...
+** vstrh.32 q[0-9]+, \[r0, #-52\]!
+** ...
+*/
+int16_t* pre_store16_1 (int16_t * a, int32x4_t v)
+{
+ a -= 26;
+ vstrhq_s32 (a, v);
+ return a;
+}
+
+/*
+**pre_storefp32_0:
+** ...
+** vstrw.32 q0, \[r0, #-4\]!
+** ...
+*/
+float32_t *pre_storefp32_0 (float32_t *a, float32x4_t v)
+{
+ a--;
+ vst1q_f32 (a, v);
+ return a;
+}
+
+/*
+**pre_store32_0:
+** ...
+** vstrw.32 q[0-9]+, \[r0, #4\]!
+** ...
+*/
+int32_t* pre_store32_0 (int32_t * a, int32x4_t v)
+{
+ a += 1;
+ vst1q_s32 (a, v);
+ return a;
+}
+
+
+/*
+**post_store8_0:
+** ...
+** vstrb.8 q[0-9]+, \[r0\], #-26
+** ...
+*/
+int8_t* post_store8_0 (int8_t * a, int8x16_t v)
+{
+ vst1q_s8 (a, v);
+ a -= 26;
+ return a;
+}
+
+/*
+**post_store8_1:
+** ...
+** vstrb.16 q[0-9]+, \[r0\], #1
+** ...
+*/
+uint8_t* post_store8_1 (uint8_t * a, uint16x8_t v)
+{
+ vstrbq_u16 (a, v);
+ a++;
+ return a;
+}
+
+/*
+**post_store8_2:
+** ...
+** vstrb.8 q[0-9]+, \[r0\], #-26
+** ...
+*/
+int8_t* post_store8_2 (int8_t * a, int8x16_t v)
+{
+ vst1q_s8 (a, v);
+ a -= 26;
+ return a;
+}
+
+/*
+**post_store8_3:
+** ...
+** vstrb.16 q[0-9]+, \[r0\], #7
+** ...
+*/
+uint8_t* post_store8_3 (uint8_t * a, uint16x8_t v)
+{
+ vstrbq_u16 (a, v);
+ a += 7;
+ return a;
+}
+
+/*
+**post_storefp16_0:
+** ...
+** vstrh.16 q[0-9]+, \[r0\], #-16
+** ...
+*/
+float16_t *post_storefp16_0 (float16_t *a, float16x8_t v)
+{
+ vst1q_f16 (a, v);
+ a -= 8;
+ return a;
+}
+
+/*
+**post_store16_0:
+** ...
+** vstrh.16 q[0-9]+, \[r0\], #126
+** ...
+*/
+int16_t* post_store16_0 (int16_t * a, int16x8_t v)
+{
+ vstrhq_s16 (a, v);
+ a += 63;
+ return a;
+}
+
+/*
+**post_store16_1:
+** ...
+** vstrh.32 q[0-9]+, \[r0\], #-16
+** ...
+*/
+uint16_t* post_store16_1 (uint16_t * a, uint32x4_t v)
+{
+ vstrhq_u32 (a, v);
+ a -= 8;
+ return a;
+}
+
+/*
+**post_storefp32_0:
+** ...
+** vstrw.32 q[0-9]+, \[r0\], #-16
+** ...
+*/
+float32_t* post_storefp32_0 (float32_t * a, float32x4_t v)
+{
+ vst1q_f32 (a, v);
+ a -= 4;
+ return a;
+}
+
+/*
+**post_store32_0:
+** ...
+** vstrw.32 q[0-9]+, \[r0\], #16
+** ...
+*/
+int32_t* post_store32_0 (int32_t * a, int32x4_t v)
+{
+ vst1q_s32 (a, v);
+ a += 4;
+ return a;
+}