target/riscv: support atomic instruction fetch (Ziccif)

Support 4-byte atomic instruction fetch when instruction is natural aligned. Current implementation is not atomic because it loads instruction twice for first and last 2 bytes. We load 4 bytes at once to keep the atomicity. This instruction preload method only applys when instruction is 4-byte aligned. If instruction is unaligned, it could be across pages so that preload will trigger additional page fault. We encounter this issue when doing pressure test of enabling & disabling Linux kernel ftrace. Ftrace with kernel preemption requires concurrent modification and execution of instruction, so non-atomic instruction fetch will cause the race condition. We may fetch the wrong instruction which is the mixing of 2 instructions. Also, RISC-V Profile wants to provide this feature by HW. RVA20U64 Ziccif protects the atomicity of instruction fetch when it is natural aligned. This commit depends on the atomic read support of translator_ld in the commit 6a9dfe1984b0c593fb0ddb52d4e70832e6201dd6. Signed-off-by: Jim Shu <jim.shu@sifive.com> Reviewed-by: Frank Chang <frank.chang@sifive.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Message-ID: <20250508094838.19394-1-jim.shu@sifive.com> Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
author: Jim Shu <jim.shu@sifive.com> 2025-05-08 17:48:38 +0800
committer: Alistair Francis <alistair.francis@wdc.com> 2025-07-04 21:09:48 +1000
commit: f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba (patch)
tree: c591eb8035aa088f4c9adc4fcb8b43fe7ef761c1
parent: 5ee4f21713d51fdf3d8756744ffe92365df8598b (diff)
download: qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.zip
qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.tar.gz
qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.tar.bz2
1 files changed, 34 insertions, 12 deletions
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index d7a6de0..9ddef2d 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1217,13 +1217,35 @@ const RISCVDecoder decoder_table[] = {
 
 const size_t decoder_table_size = ARRAY_SIZE(decoder_table);
 
-static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
+static void decode_opc(CPURISCVState *env, DisasContext *ctx)
 {
+    uint32_t opcode;
+    bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
+
     ctx->virt_inst_excp = false;
-    ctx->cur_insn_len = insn_len(opcode);
+    if (pc_is_4byte_align) {
+        /*
+         * Load 4 bytes at once to make instruction fetch atomically.
+         *
+         * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be
+         * across pages. We could preload 4 bytes instruction no matter
+         * real one is 2 or 4 bytes. Instruction preload wouldn't trigger
+         * additional page fault.
+         */
+        opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
+    } else {
+        /*
+         * For unaligned pc, instruction preload may trigger additional
+         * page fault so we only load 2 bytes here.
+         */
+        opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next);
+    }
+    ctx->ol = ctx->xl;
+
+    ctx->cur_insn_len = insn_len((uint16_t)opcode);
     /* Check for compressed insn */
     if (ctx->cur_insn_len == 2) {
-        ctx->opcode = opcode;
+        ctx->opcode = (uint16_t)opcode;
         /*
          * The Zca extension is added as way to refer to instructions in the C
          * extension that do not include the floating-point loads and stores
@@ -1233,15 +1255,17 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
             return;
         }
     } else {
-        uint32_t opcode32 = opcode;
-        opcode32 = deposit32(opcode32, 16, 16,
-                             translator_lduw(env, &ctx->base,
-                                             ctx->base.pc_next + 2));
-        ctx->opcode = opcode32;
+        if (!pc_is_4byte_align) {
+            /* Load last 2 bytes of instruction here */
+            opcode = deposit32(opcode, 16, 16,
+                               translator_lduw(env, &ctx->base,
+                                               ctx->base.pc_next + 2));
+        }
+        ctx->opcode = opcode;
 
         for (guint i = 0; i < ctx->decoders->len; ++i) {
             riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i);
-            if (func(ctx, opcode32)) {
+            if (func(ctx, opcode)) {
                 return;
             }
         }
@@ -1319,10 +1343,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPURISCVState *env = cpu_env(cpu);
-    uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
 
-    ctx->ol = ctx->xl;
-    decode_opc(env, ctx, opcode16);
+    decode_opc(env, ctx);
     ctx->base.pc_next += ctx->cur_insn_len;
 
     /*
author	Jim Shu <jim.shu@sifive.com>	2025-05-08 17:48:38 +0800
committer	Alistair Francis <alistair.francis@wdc.com>	2025-07-04 21:09:48 +1000
commit	f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba (patch)
tree	c591eb8035aa088f4c9adc4fcb8b43fe7ef761c1
parent	5ee4f21713d51fdf3d8756744ffe92365df8598b (diff)
download	qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.zip qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.tar.gz qemu-f9eaa1542be5f7273dec1111a1e17d8ff2a5bcba.tar.bz2