target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld}

While we don't require 16-byte atomicity here, using a single larger operation simplifies the code. Introduce finalize_memop_asimd for this. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20230530191438.411344-6-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Richard Henderson <richard.henderson@linaro.org> 2023-06-06 10:19:35 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2023-06-06 10:19:35 +0100
commit: d450bd0157be43d273116c3e3617883c8a0ac3d1 (patch)
tree: 691df745f26c38f0d5bfd4dd19f4feb225427184 /target/arm/tcg
parent: c74cc082a6d3f8fde7778d26f600967582741967 (diff)
download: qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.zip
qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.tar.gz
qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.tar.bz2
2 files changed, 35 insertions, 24 deletions
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 1fff74c..3674fc1 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -911,26 +911,20 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 {
     /* This writes the bottom N bits of a 128 bit wide vector to memory */
     TCGv_i64 tmplo = tcg_temp_new_i64();
-    MemOp mop;
+    MemOp mop = finalize_memop_asimd(s, size);
 
     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 
-    if (size < 4) {
-        mop = finalize_memop(s, size);
+    if (size < MO_128) {
         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
     } else {
-        bool be = s->be_data == MO_BE;
-        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
         TCGv_i64 tmphi = tcg_temp_new_i64();
+        TCGv_i128 t16 = tcg_temp_new_i128();
 
         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
+        tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
 
-        mop = s->be_data | MO_UQ;
-        tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
-                            mop | (s->align_mem ? MO_ALIGN_16 : 0));
-        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
-        tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
-                            get_mem_index(s), mop);
+        tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
     }
 }
 
@@ -942,24 +936,17 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
     /* This always zero-extends and writes to a full 128 bit wide vector */
     TCGv_i64 tmplo = tcg_temp_new_i64();
     TCGv_i64 tmphi = NULL;
-    MemOp mop;
+    MemOp mop = finalize_memop_asimd(s, size);
 
-    if (size < 4) {
-        mop = finalize_memop(s, size);
+    if (size < MO_128) {
         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
     } else {
-        bool be = s->be_data == MO_BE;
-        TCGv_i64 tcg_hiaddr;
+        TCGv_i128 t16 = tcg_temp_new_i128();
+
+        tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
 
         tmphi = tcg_temp_new_i64();
-        tcg_hiaddr = tcg_temp_new_i64();
-
-        mop = s->be_data | MO_UQ;
-        tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
-                            mop | (s->align_mem ? MO_ALIGN_16 : 0));
-        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
-        tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
-                            get_mem_index(s), mop);
+        tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
     }
 
     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index c1e57a5..3aa486a 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -610,6 +610,30 @@ static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc)
 }
 
 /**
+ * finalize_memop_asimd:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD.
+ */
+static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc)
+{
+    /*
+     * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16,
+     * if IsAligned(8), the first case provides separate atomicity for
+     * the pair of 64-bit accesses.  If !IsAligned(8), the middle cases
+     * do not apply, and we're left with the final case of no atomicity.
+     * Thus MO_ATOM_IFALIGN_PAIR.
+     *
+     * For other sizes, normal LSE2 rules apply.
+     */
+    if ((opc & MO_SIZE) == MO_128) {
+        return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR);
+    }
+    return finalize_memop(s, opc);
+}
+
+/**
  * asimd_imm_const: Expand an encoded SIMD constant value
  *
  * Expand a SIMD constant value. This is essentially the pseudocode
author	Richard Henderson <richard.henderson@linaro.org>	2023-06-06 10:19:35 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2023-06-06 10:19:35 +0100
commit	d450bd0157be43d273116c3e3617883c8a0ac3d1 (patch)
tree	691df745f26c38f0d5bfd4dd19f4feb225427184 /target/arm/tcg
parent	c74cc082a6d3f8fde7778d26f600967582741967 (diff)
download	qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.zip qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.tar.gz qemu-d450bd0157be43d273116c3e3617883c8a0ac3d1.tar.bz2