aboutsummaryrefslogtreecommitdiff
path: root/sim/aarch64/simulator.c
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2015-11-24 08:47:59 +0000
committerNick Clifton <nickc@redhat.com>2015-11-24 08:47:59 +0000
commit2e8cf49e1387eba9c4ce062885b99a6eb76c01f8 (patch)
tree363800e2edad589cb37f72e10fc842097a8ec9c4 /sim/aarch64/simulator.c
parent351e610191016136a49ee2a0889f1c4929169fc6 (diff)
downloadbinutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.zip
binutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.tar.gz
binutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.tar.bz2
Add an AArch64 simulator to GDB.
sim * configure.tgt: Add aarch64 entry. * configure: Regenerate. * sim/aarch64/configure.ac: New configure template. * sim/aarch64/aclocal.m4: Generate. * sim/aarch64/config.in: Generate. * sim/aarch64/configure: Generate. * sim/aarch64/cpustate.c: New file - functions for accessing AArch64 registers. * sim/aarch64/cpustate.h: New header. * sim/aarch64/decode.h: New header. * sim/aarch64/interp.c: New file - interface between GDB and simulator. * sim/aarch64/Makefile.in: New makefile template. * sim/aarch64/memory.c: New file - functions for simulating aarch64 memory accesses. * sim/aarch64/memory.h: New header. * sim/aarch64/sim-main.h: New header. * sim/aarch64/simulator.c: New file - aarch64 simulator functions. * sim/aarch64/simulator.h: New header. include/gdb * sim-aarch64.h: New file. sim/test * configure: Regenerate. * sim/aarch64: New directory.
Diffstat (limited to 'sim/aarch64/simulator.c')
-rw-r--r--sim/aarch64/simulator.c13047
1 files changed, 13047 insertions, 0 deletions
diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
new file mode 100644
index 0000000..31c054c
--- /dev/null
+++ b/sim/aarch64/simulator.c
@@ -0,0 +1,13047 @@
+/* simulator.c -- Interface for the AArch64 simulator.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ Contributed by Red Hat.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <math.h>
+#include <time.h>
+#include <limits.h>
+
+#include "dis-asm.h"
+
+#include "simulator.h"
+#include "cpustate.h"
+#include "memory.h"
+
+#define NO_SP 0
+#define SP_OK 1
+
+bfd_boolean disas = FALSE;
+
+#define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
+#define IS_SET(_X) ( TST (( _X )))
+#define IS_CLEAR(_X) (!TST (( _X )))
+
+#define HALT_UNALLOC \
+ do \
+ { \
+ if (TRACE_INSN_P (cpu)) \
+ { \
+ aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \
+ TRACE_INSN (cpu, \
+ "Unallocated instruction detected at sim line %d,"\
+ " exe addr %" PRIx64, \
+ __LINE__, aarch64_get_PC (cpu)); \
+ } \
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
+ sim_stopped, SIM_SIGILL); \
+ } \
+ while (0)
+
+#define HALT_NYI \
+ do \
+ { \
+ if (TRACE_INSN_P (cpu)) \
+ { \
+ aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \
+ TRACE_INSN (cpu, \
+ "Unimplemented instruction detected at sim line %d,"\
+ " exe addr %" PRIx64, \
+ __LINE__, aarch64_get_PC (cpu)); \
+ } \
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
+ sim_stopped, SIM_SIGABRT); \
+ } \
+ while (0)
+
+#define NYI_assert(HI, LO, EXPECTED) \
+ do \
+ { \
+ if (uimm (aarch64_get_instr (cpu), (HI), (LO)) != (EXPECTED)) \
+ HALT_NYI; \
+ } \
+ while (0)
+
+#define HALT_UNREACHABLE \
+ do \
+ { \
+ TRACE_EVENTS (cpu, "ISE: unreachable code point"); \
+ sim_engine_abort (NULL, cpu, aarch64_get_PC (cpu), "Internal Error"); \
+ } \
+ while (0)
+
+/* Helper functions used by expandLogicalImmediate. */
+
+/* for i = 1, ... N result<i-1> = 1 other bits are zero */
+static inline uint64_t
+ones (int N)
+{
+ return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
+}
+
+/* result<0> to val<N> */
+static inline uint64_t
+pickbit (uint64_t val, int N)
+{
+ return pickbits64 (val, N, N);
+}
+
+static uint64_t
+expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
+{
+ uint64_t mask;
+ uint64_t imm;
+ unsigned simd_size;
+
+ /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
+ (in other words, right rotated by R), then replicated. */
+ if (N != 0)
+ {
+ simd_size = 64;
+ mask = 0xffffffffffffffffull;
+ }
+ else
+ {
+ switch (S)
+ {
+ case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
+ case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
+ case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
+ case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
+ case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
+ default: return 0;
+ }
+ mask = (1ull << simd_size) - 1;
+ /* Top bits are IGNORED. */
+ R &= simd_size - 1;
+ }
+
+ /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
+ if (S == simd_size - 1)
+ return 0;
+
+ /* S+1 consecutive bits to 1. */
+ /* NOTE: S can't be 63 due to detection above. */
+ imm = (1ull << (S + 1)) - 1;
+
+ /* Rotate to the left by simd_size - R. */
+ if (R != 0)
+ imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
+
+ /* Replicate the value according to SIMD size. */
+ switch (simd_size)
+ {
+ case 2: imm = (imm << 2) | imm;
+ case 4: imm = (imm << 4) | imm;
+ case 8: imm = (imm << 8) | imm;
+ case 16: imm = (imm << 16) | imm;
+ case 32: imm = (imm << 32) | imm;
+ case 64: break;
+ default: return 0;
+ }
+
+ return imm;
+}
+
+/* Instr[22,10] encodes N immr and imms. we want a lookup table
+ for each possible combination i.e. 13 bits worth of int entries. */
+#define LI_TABLE_SIZE (1 << 13)
+static uint64_t LITable[LI_TABLE_SIZE];
+
+void
+aarch64_init_LIT_table (void)
+{
+ unsigned index;
+
+ for (index = 0; index < LI_TABLE_SIZE; index++)
+ {
+ uint32_t N = uimm (index, 12, 12);
+ uint32_t immr = uimm (index, 11, 6);
+ uint32_t imms = uimm (index, 5, 0);
+
+ LITable [index] = expand_logical_immediate (imms, immr, N);
+ }
+}
+
+static void
+dexNotify (sim_cpu *cpu)
+{
+ /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
+ 2 ==> exit Java, 3 ==> start next bytecode. */
+ uint32_t type = uimm (aarch64_get_instr (cpu), 14, 0);
+
+ TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
+
+ switch (type)
+ {
+ case 0:
+ /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
+ aarch64_get_reg_u64 (cpu, R22, 0)); */
+ break;
+ case 1:
+ /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
+ aarch64_get_reg_u64 (cpu, R22, 0)); */
+ break;
+ case 2:
+ /* aarch64_notifyMethodExit (); */
+ break;
+ case 3:
+ /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
+ aarch64_get_reg_u64 (cpu, R22, 0)); */
+ break;
+ }
+}
+
+/* secondary decode within top level groups */
+
+static void
+dexPseudo (sim_cpu *cpu)
+{
+ /* assert instr[28,27] = 00
+
+ We provide 2 pseudo instructions:
+
+ HALT stops execution of the simulator causing an immediate
+ return to the x86 code which entered it.
+
+ CALLOUT initiates recursive entry into x86 code. A register
+ argument holds the address of the x86 routine. Immediate
+ values in the instruction identify the number of general
+ purpose and floating point register arguments to be passed
+ and the type of any value to be returned. */
+
+ uint32_t PSEUDO_HALT = 0xE0000000U;
+ uint32_t PSEUDO_CALLOUT = 0x00018000U;
+ uint32_t PSEUDO_CALLOUTR = 0x00018001U;
+ uint32_t PSEUDO_NOTIFY = 0x00014000U;
+ uint32_t dispatch;
+
+ if (aarch64_get_instr (cpu) == PSEUDO_HALT)
+ {
+ TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGTRAP);
+ }
+
+ dispatch = uimm (aarch64_get_instr (cpu), 31, 15);
+
+ /* We do not handle callouts at the moment. */
+ if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
+ {
+ TRACE_EVENTS (cpu, " Callout");
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGABRT);
+ }
+
+ else if (dispatch == PSEUDO_NOTIFY)
+ dexNotify (cpu);
+
+ else
+ HALT_UNALLOC;
+}
+
+/* Load-store single register (unscaled offset)
+ These instructions employ a base register plus an unscaled signed
+ 9 bit offset.
+
+ N.B. the base register (source) can be Xn or SP. all other
+ registers may not be SP. */
+
+/* 32 bit load 32 bit unscaled signed 9 bit. */
+static void
+ldur32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 64 bit load 64 bit unscaled signed 9 bit. */
+static void
+ldur64 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 32 bit load zero-extended byte unscaled signed 9 bit. */
+static void
+ldurb32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 32 bit load sign-extended byte unscaled signed 9 bit. */
+static void
+ldursb32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 64 bit load sign-extended byte unscaled signed 9 bit. */
+static void
+ldursb64 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 32 bit load zero-extended short unscaled signed 9 bit */
+static void
+ldurh32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 32 bit load sign-extended short unscaled signed 9 bit */
+static void
+ldursh32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 64 bit load sign-extended short unscaled signed 9 bit */
+static void
+ldursh64 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* 64 bit load sign-extended word unscaled signed 9 bit */
+static void
+ldursw (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + offset));
+}
+
+/* N.B. with stores the value in source is written to the address
+ identified by source2 modified by offset. */
+
+/* 32 bit store 32 bit unscaled signed 9 bit. */
+static void
+stur32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_u32 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_reg_u32 (cpu, rd, NO_SP));
+}
+
+/* 64 bit store 64 bit unscaled signed 9 bit */
+static void
+stur64 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_u64 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_reg_u64 (cpu, rd, NO_SP));
+}
+
+/* 32 bit store byte unscaled signed 9 bit */
+static void
+sturb (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_u8 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_reg_u8 (cpu, rd, NO_SP));
+}
+
+/* 32 bit store short unscaled signed 9 bit */
+static void
+sturh (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_u16 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_reg_u16 (cpu, rd, NO_SP));
+}
+
+/* Load single register pc-relative label
+ Offset is a signed 19 bit immediate count in words
+ rt may not be SP. */
+
+/* 32 bit pc-relative load */
+static void
+ldr32_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_mem_u32
+ (cpu, aarch64_get_PC (cpu) + offset * 4));
+}
+
+/* 64 bit pc-relative load */
+static void
+ldr_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_mem_u64
+ (cpu, aarch64_get_PC (cpu) + offset * 4));
+}
+
+/* sign extended 32 bit pc-relative load */
+static void
+ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_mem_s32
+ (cpu, aarch64_get_PC (cpu) + offset * 4));
+}
+
+/* float pc-relative load */
+static void
+fldrs_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, rd,
+ aarch64_get_mem_float
+ (cpu, aarch64_get_PC (cpu) + offset * 4));
+}
+
+/* double pc-relative load */
+static void
+fldrd_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, st,
+ aarch64_get_mem_double
+ (cpu, aarch64_get_PC (cpu) + offset * 4));
+}
+
+/* long double pc-relative load. */
+static void
+fldrq_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
+ FRegister a;
+
+ aarch64_get_mem_long_double (cpu, addr, & a);
+ aarch64_set_FP_long_double (cpu, st, a);
+}
+
+/* This can be used to scale an offset by applying
+ the requisite shift. the second argument is either
+ 16, 32 or 64. */
+
+#define SCALE(_offset, _elementSize) \
+ ((_offset) << ScaleShift ## _elementSize)
+
+/* This can be used to optionally scale a register derived offset
+ by applying the requisite shift as indicated by the Scaling
+ argument. the second argument is either Byte, Short, Word
+ or Long. The third argument is either Scaled or Unscaled.
+ N.B. when _Scaling is Scaled the shift gets ANDed with
+ all 1s while when it is Unscaled it gets ANDed with 0. */
+
+#define OPT_SCALE(_offset, _elementType, _Scaling) \
+ ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
+
+/* This can be used to zero or sign extend a 32 bit register derived
+ value to a 64 bit value. the first argument must be the value as
+ a uint32_t and the second must be either UXTW or SXTW. The result
+ is returned as an int64_t. */
+
+static inline int64_t
+extend (uint32_t value, Extension extension)
+{
+ union
+ {
+ uint32_t u;
+ int32_t n;
+ } x;
+
+ /* A branchless variant of this ought to be possible. */
+ if (extension == UXTW || extension == NoExtension)
+ return value;
+
+ x.u = value;
+ return x.n;
+}
+
+/* Scalar Floating Point
+
+ FP load/store single register (4 addressing modes)
+
+ N.B. the base register (source) can be the stack pointer.
+ The secondary source register (source2) can only be an Xn register. */
+
+/* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_FP_float (cpu, st, aarch64_get_mem_float (cpu, address));
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* Load 32 bit scaled unsigned 12 bit. */
+static void
+fldrs_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ aarch64_set_FP_float (cpu, st,
+ aarch64_get_mem_float
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 32)));
+}
+
+/* Load 32 bit scaled or unscaled zero- or sign-extended
+ 32-bit register offset. */
+static void
+fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_FP_float (cpu, st,
+ aarch64_get_mem_float
+ (cpu, address + displacement));
+}
+
+/* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* Load 64 bit scaled unsigned 12 bit. */
+static void
+fldrd_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
+
+ aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address));
+}
+
+/* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
+static void
+fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 64, scaling);
+
+ fldrd_wb (cpu, displacement, NoWriteBack);
+}
+
+/* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ FRegister a;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_get_mem_long_double (cpu, address, & a);
+ aarch64_set_FP_long_double (cpu, st, a);
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* Load 128 bit scaled unsigned 12 bit. */
+static void
+fldrq_abs (sim_cpu *cpu, uint32_t offset)
+{
+ FRegister a;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
+
+ aarch64_get_mem_long_double (cpu, address, & a);
+ aarch64_set_FP_long_double (cpu, st, a);
+}
+
+/* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
+static void
+fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 128, scaling);
+
+ fldrq_wb (cpu, displacement, NoWriteBack);
+}
+
+/* Memory Access
+
+ load-store single register
+ There are four addressing modes available here which all employ a
+ 64 bit source (base) register.
+
+ N.B. the base register (source) can be the stack pointer.
+ The secondary source register (source2)can only be an Xn register.
+
+ Scaled, 12-bit, unsigned immediate offset, without pre- and
+ post-index options.
+ Unscaled, 9-bit, signed immediate offset with pre- or post-index
+ writeback.
+ scaled or unscaled 64-bit register offset.
+ scaled or unscaled 32-bit extended register offset.
+
+ All offsets are assumed to be raw from the decode i.e. the
+ simulator is expected to adjust scaled offsets based on the
+ accessed data size with register or extended register offset
+ versions the same applies except that in the latter case the
+ operation may also require a sign extend.
+
+ A separate method is provided for each possible addressing mode. */
+
+/* 32 bit load 32 bit scaled unsigned 12 bit */
+static void
+ldr32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 32)));
+}
+
+/* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
+static void
+ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit load 32 bit scaled or unscaled
+ zero- or sign-extended 32-bit register offset */
+static void
+ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_u32 (cpu, address + displacement));
+}
+
+/* 64 bit load 64 bit scaled unsigned 12 bit */
+static void
+ldr_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 64)));
+}
+
+/* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
+static void
+ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit load 64 bit scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 64, scaling);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_u64 (cpu, address + displacement));
+}
+
+/* 32 bit load zero-extended byte scaled unsigned 12 bit. */
+static void
+ldrb32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be
+ there is no scaling required for a byte load. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_u8
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
+}
+
+/* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
+static void
+ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit load zero-extended byte scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+
+ /* There is no scaling required for a byte load. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_u8 (cpu, address + displacement));
+}
+
+/* 64 bit load sign-extended byte unscaled signed 9 bit
+ with pre- or post-writeback. */
+static void
+ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit load sign-extended byte scaled unsigned 12 bit. */
+static void
+ldrsb_abs (sim_cpu *cpu, uint32_t offset)
+{
+ ldrsb_wb (cpu, offset, NoWriteBack);
+}
+
+/* 64 bit load sign-extended byte scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ /* There is no scaling required for a byte load. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_s8 (cpu, address + displacement));
+}
+
+/* 32 bit load zero-extended short scaled unsigned 12 bit. */
+static void
+ldrh32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 16)));
+}
+
+/* 32 bit load zero-extended short unscaled signed 9 bit
+ with pre- or post-writeback. */
+static void
+ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit load zero-extended short scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 16, scaling);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_u16 (cpu, address + displacement));
+}
+
+/* 32 bit load sign-extended short scaled unsigned 12 bit. */
+static void
+ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s16
+ (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 16)));
+}
+
+/* 32 bit load sign-extended short unscaled signed 9 bit
+ with pre- or post-writeback. */
+static void
+ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ (uint32_t) aarch64_get_mem_s16 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit load sign-extended short scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 16, scaling);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ (uint32_t) aarch64_get_mem_s16
+ (cpu, address + displacement));
+}
+
+/* 64 bit load sign-extended short scaled unsigned 12 bit. */
+static void
+ldrsh_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 16)));
+}
+
+/* 64 bit load sign-extended short unscaled signed 9 bit
+ with pre- or post-writeback. */
+static void
+ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit load sign-extended short scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 16, scaling);
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ aarch64_get_mem_s16 (cpu, address + displacement));
+}
+
+/* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
+static void
+ldrsw_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ return aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 32)));
+}
+
+/* 64 bit load sign-extended 32 bit unscaled signed 9 bit
+ with pre- or post-writeback. */
+static void
+ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit load sign-extended 32 bit scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_reg_s64 (cpu, rt, NO_SP,
+ aarch64_get_mem_s32 (cpu, address + displacement));
+}
+
+/* N.B. with stores the value in source is written to the
+ address identified by source2 modified by source3/offset. */
+
+/* 32 bit store scaled unsigned 12 bit. */
+static void
+str32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 32)),
+ aarch64_get_reg_u32 (cpu, rt, NO_SP));
+}
+
+/* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
+static void
+str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit store scaled or unscaled zero- or
+ sign-extended 32-bit register offset. */
+static void
+str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_mem_u32 (cpu, address + displacement,
+ aarch64_get_reg_u64 (cpu, rt, NO_SP));
+}
+
+/* 64 bit store scaled unsigned 12 bit. */
+static void
+str_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_u64 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 64),
+ aarch64_get_reg_u64 (cpu, rt, NO_SP));
+}
+
+/* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
+static void
+str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit store scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 64, scaling);
+
+ aarch64_set_mem_u64 (cpu, address + displacement,
+ aarch64_get_reg_u64 (cpu, rt, NO_SP));
+}
+
+/* 32 bit store byte scaled unsigned 12 bit. */
+static void
+strb_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be.
+ There is no scaling required for a byte load. */
+ aarch64_set_mem_u8 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_reg_u8 (cpu, rt, NO_SP));
+}
+
+/* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
+static void
+strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit store byte scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+
+ /* There is no scaling required for a byte load. */
+ aarch64_set_mem_u8 (cpu, address + displacement,
+ aarch64_get_reg_u8 (cpu, rt, NO_SP));
+}
+
+/* 32 bit store short scaled unsigned 12 bit. */
+static void
+strh_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be. */
+ aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 16),
+ aarch64_get_reg_u16 (cpu, rt, NO_SP));
+}
+
+/* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
+static void
+strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address;
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit store short scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* rn may reference SP, rm and rt must reference ZR */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
+ uint64_t displacement = OPT_SCALE (extended, 16, scaling);
+
+ aarch64_set_mem_u16 (cpu, address + displacement,
+ aarch64_get_reg_u16 (cpu, rt, NO_SP));
+}
+
+/* Prefetch unsigned 12 bit. */
+static void
+prfm_abs (sim_cpu *cpu, uint32_t offset)
+{
+ /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
+ 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
+ 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
+ 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
+ 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
+ 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
+ ow ==> UNALLOC
+ PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + SCALE (offset, 64). */
+
+ /* TODO : implement prefetch of address. */
+}
+
+/* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
+static void
+prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
+ 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
+ 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
+ 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
+ 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
+ 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
+ ow ==> UNALLOC
+ rn may reference SP, rm may only reference ZR
+ PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0);
+ uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 64, scaling);
+ uint64_t address = base + displacement. */
+
+ /* TODO : implement prefetch of address */
+}
+
+/* 64 bit pc-relative prefetch. */
+static void
+prfm_pcrel (sim_cpu *cpu, int32_t offset)
+{
+ /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
+ 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
+ 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
+ 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
+ 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
+ 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
+ ow ==> UNALLOC
+ PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_PC (cpu) + offset. */
+
+ /* TODO : implement this */
+}
+
+/* Load-store exclusive. */
+
+static void
+ldxr (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int size = uimm (aarch64_get_instr (cpu), 31, 30);
+ /* int ordered = uimm (aarch64_get_instr (cpu), 15, 15); */
+ /* int exclusive = ! uimm (aarch64_get_instr (cpu), 23, 23); */
+
+ switch (size)
+ {
+ case 0:
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
+ break;
+ case 1:
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
+ break;
+ case 2:
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
+ break;
+ case 3:
+ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
+ break;
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+stxr (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rs = uimm (aarch64_get_instr (cpu), 20, 16);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int size = uimm (aarch64_get_instr (cpu), 31, 30);
+ uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
+
+ switch (size)
+ {
+ case 0: aarch64_set_mem_u8 (cpu, address, data); break;
+ case 1: aarch64_set_mem_u16 (cpu, address, data); break;
+ case 2: aarch64_set_mem_u32 (cpu, address, data); break;
+ case 3: aarch64_set_mem_u64 (cpu, address, data); break;
+ default: HALT_UNALLOC;
+ }
+
+ aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
+}
+
+static void
+dexLoadLiteral (sim_cpu *cpu)
+{
+ /* instr[29,27] == 011
+ instr[25,24] == 00
+ instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
+ 010 ==> LDRX, 011 ==> FLDRD
+ 100 ==> LDRSW, 101 ==> FLDRQ
+ 110 ==> PRFM, 111 ==> UNALLOC
+ instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
+ instr[23, 5] == simm19 */
+
+ /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */
+ uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 1)
+ | uimm (aarch64_get_instr (cpu), 26, 26));
+ int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
+
+ switch (dispatch)
+ {
+ case 0: ldr32_pcrel (cpu, imm); break;
+ case 1: fldrs_pcrel (cpu, imm); break;
+ case 2: ldr_pcrel (cpu, imm); break;
+ case 3: fldrd_pcrel (cpu, imm); break;
+ case 4: ldrsw_pcrel (cpu, imm); break;
+ case 5: fldrq_pcrel (cpu, imm); break;
+ case 6: prfm_pcrel (cpu, imm); break;
+ case 7:
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+/* Immediate arithmetic
+ The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
+ value left shifted by 12 bits (done at decode).
+
+ N.B. the register args (dest, source) can normally be Xn or SP.
+ the exception occurs for flag setting instructions which may
+ only use Xn for the output (dest). */
+
+/* 32 bit add immediate. */
+static void
+add32 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
+}
+
+/* 64 bit add immediate. */
+static void
+add64 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
+}
+
+static void
+set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
+{
+ int32_t result = value1 + value2;
+ int64_t sresult = (int64_t) value1 + (int64_t) value2;
+ uint64_t uresult = (uint64_t)(uint32_t) value1
+ + (uint64_t)(uint32_t) value2;
+ uint32_t flags = 0;
+
+ if (result == 0)
+ flags |= Z;
+
+ if (result & (1 << 31))
+ flags |= N;
+
+ if (uresult != result)
+ flags |= C;
+
+ if (sresult != result)
+ flags |= V;
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+static void
+set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
+{
+ int64_t sval1 = value1;
+ int64_t sval2 = value2;
+ uint64_t result = value1 + value2;
+ int64_t sresult = sval1 + sval2;
+ uint32_t flags = 0;
+
+ if (result == 0)
+ flags |= Z;
+
+ if (result & (1ULL << 63))
+ flags |= N;
+
+ if (sval1 < 0)
+ {
+ if (sval2 < 0)
+ {
+ /* Negative plus a negative. Overflow happens if
+ the result is greater than either of the operands. */
+ if (sresult > sval1 || sresult > sval2)
+ flags |= V;
+ }
+ /* else Negative plus a positive. Overflow cannot happen. */
+ }
+ else /* value1 is +ve. */
+ {
+ if (sval2 < 0)
+ {
+ /* Overflow can only occur if we computed "0 - MININT". */
+ if (sval1 == 0 && sval2 == (1LL << 63))
+ flags |= V;
+ }
+ else
+ {
+ /* Postive plus positive - overflow has happened if the
+ result is smaller than either of the operands. */
+ if (result < value1 || result < value2)
+ flags |= V | C;
+ }
+ }
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+#define NEG(a) (((a) & signbit) == signbit)
+#define POS(a) (((a) & signbit) == 0)
+
+static void
+set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
+{
+ uint32_t result = value1 - value2;
+ uint32_t flags = 0;
+ uint32_t signbit = 1ULL << 31;
+
+ if (result == 0)
+ flags |= Z;
+
+ if (NEG (result))
+ flags |= N;
+
+ if ( (NEG (value1) && POS (value2))
+ || (NEG (value1) && POS (result))
+ || (POS (value2) && POS (result)))
+ flags |= C;
+
+ if ( (NEG (value1) && POS (value2) && POS (result))
+ || (POS (value1) && NEG (value2) && NEG (result)))
+ flags |= V;
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+static void
+set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
+{
+ uint64_t result = value1 - value2;
+ uint32_t flags = 0;
+ uint64_t signbit = 1ULL << 63;
+
+ if (result == 0)
+ flags |= Z;
+
+ if (NEG (result))
+ flags |= N;
+
+ if ( (NEG (value1) && POS (value2))
+ || (NEG (value1) && POS (result))
+ || (POS (value2) && POS (result)))
+ flags |= C;
+
+ if ( (NEG (value1) && POS (value2) && POS (result))
+ || (POS (value1) && NEG (value2) && NEG (result)))
+ flags |= V;
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+static void
+set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
+{
+ uint32_t flags = 0;
+
+ if (result == 0)
+ flags |= Z;
+ else
+ flags &= ~ Z;
+
+ if (result & (1 << 31))
+ flags |= N;
+ else
+ flags &= ~ N;
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+static void
+set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
+{
+ uint32_t flags = 0;
+
+ if (result == 0)
+ flags |= Z;
+ else
+ flags &= ~ Z;
+
+ if (result & (1ULL << 63))
+ flags |= N;
+ else
+ flags &= ~ N;
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+/* 32 bit add immediate set flags. */
+static void
+adds32 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* TODO : do we need to worry about signs here? */
+ int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
+ set_flags_for_add32 (cpu, value1, aimm);
+}
+
+/* 64 bit add immediate set flags. */
+static void
+adds64 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ uint64_t value2 = aimm;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
+ set_flags_for_add64 (cpu, value1, value2);
+}
+
+/* 32 bit sub immediate. */
+static void
+sub32 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
+}
+
+/* 64 bit sub immediate. */
+static void
+sub64 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
+}
+
+/* 32 bit sub immediate set flags. */
+static void
+subs32 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ uint32_t value2 = aimm;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub32 (cpu, value1, value2);
+}
+
+/* 64 bit sub immediate set flags. */
+static void
+subs64 (sim_cpu *cpu, uint32_t aimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ uint32_t value2 = aimm;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub64 (cpu, value1, value2);
+}
+
+/* Data Processing Register. */
+
+/* First two helpers to perform the shift operations. */
+
+static inline uint32_t
+shifted32 (uint32_t value, Shift shift, uint32_t count)
+{
+ switch (shift)
+ {
+ default:
+ case LSL:
+ return (value << count);
+ case LSR:
+ return (value >> count);
+ case ASR:
+ {
+ int32_t svalue = value;
+ return (svalue >> count);
+ }
+ case ROR:
+ {
+ uint32_t top = value >> count;
+ uint32_t bottom = value << (32 - count);
+ return (bottom | top);
+ }
+ }
+}
+
+static inline uint64_t
+shifted64 (uint64_t value, Shift shift, uint32_t count)
+{
+ switch (shift)
+ {
+ default:
+ case LSL:
+ return (value << count);
+ case LSR:
+ return (value >> count);
+ case ASR:
+ {
+ int64_t svalue = value;
+ return (svalue >> count);
+ }
+ case ROR:
+ {
+ uint64_t top = value >> count;
+ uint64_t bottom = value << (64 - count);
+ return (bottom | top);
+ }
+ }
+}
+
+/* Arithmetic shifted register.
+ These allow an optional LSL, ASR or LSR to the second source
+ register with a count up to the register bit count.
+
+ N.B register args may not be SP. */
+
+/* 32 bit ADD shifted register. */
+static void
+add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count));
+}
+
+/* 64 bit ADD shifted register. */
+static void
+add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count));
+}
+
+/* 32 bit ADD shifted register setting flags. */
+static void
+adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
+ set_flags_for_add32 (cpu, value1, value2);
+}
+
+/* 64 bit ADD shifted register setting flags. */
+static void
+adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
+ set_flags_for_add64 (cpu, value1, value2);
+}
+
+/* 32 bit SUB shifted register. */
+static void
+sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count));
+}
+
+/* 64 bit SUB shifted register. */
+static void
+sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count));
+}
+
+/* 32 bit SUB shifted register setting flags. */
+static void
+subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub32 (cpu, value1, value2);
+}
+
+/* 64 bit SUB shifted register setting flags. */
+static void
+subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub64 (cpu, value1, value2);
+}
+
+/* First a couple more helpers to fetch the
+ relevant source register element either
+ sign or zero extended as required by the
+ extension value. */
+
+static uint32_t
+extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
+{
+ switch (extension)
+ {
+ case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
+ case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
+ case UXTW: /* Fall through. */
+ case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
+ case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
+ case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
+ case SXTW: /* Fall through. */
+ case SXTX: /* Fall through. */
+ default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
+ }
+}
+
+static uint64_t
+extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
+{
+ switch (extension)
+ {
+ case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
+ case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
+ case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
+ case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
+ case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
+ case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
+ case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
+ case SXTX:
+ default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
+ }
+}
+
+/* Arithmetic extending register
+ These allow an optional sign extension of some portion of the
+ second source register followed by an optional left shift of
+ between 1 and 4 bits (i.e. a shift of 0-4 bits???)
+
+ N.B output (dest) and first input arg (source) may normally be Xn
+ or SP. However, for flag setting operations dest can only be
+ Xn. Second input registers are always Xn. */
+
+/* 32 bit ADD extending register. */
+static void
+add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, SP_OK)
+ + (extreg32 (cpu, rm, extension) << shift));
+}
+
+/* 64 bit ADD extending register.
+ N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
+static void
+add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ + (extreg64 (cpu, rm, extension) << shift));
+}
+
+/* 32 bit ADD extending register setting flags. */
+static void
+adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
+ uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
+ set_flags_for_add32 (cpu, value1, value2);
+}
+
+/* 64 bit ADD extending register setting flags */
+/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
+static void
+adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
+ set_flags_for_add64 (cpu, value1, value2);
+}
+
+/* 32 bit SUB extending register. */
+static void
+sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, SP_OK)
+ - (extreg32 (cpu, rm, extension) << shift));
+}
+
+/* 64 bit SUB extending register. */
+/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
+static void
+sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK)
+ - (extreg64 (cpu, rm, extension) << shift));
+}
+
+/* 32 bit SUB extending register setting flags. */
+static void
+subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
+ uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub32 (cpu, value1, value2);
+}
+
+/* 64 bit SUB extending register setting flags */
+/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
+static void
+subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
+ set_flags_for_sub64 (cpu, value1, value2);
+}
+
+static void
+dexAddSubtractImmediate (sim_cpu *cpu)
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30] = op : 0 ==> ADD, 1 ==> SUB
+ instr[29] = set : 0 ==> no flags, 1 ==> set flags
+ instr[28,24] = 10001
+ instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
+ instr[21,10] = uimm12
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ /* N.B. the shift is applied at decode before calling the add/sub routine. */
+ uint32_t shift = uimm (aarch64_get_instr (cpu), 23, 22);
+ uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10);
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+
+ NYI_assert (28, 24, 0x11);
+
+ if (shift > 1)
+ HALT_UNALLOC;
+
+ if (shift)
+ imm <<= 12;
+
+ switch (dispatch)
+ {
+ case 0: add32 (cpu, imm); break;
+ case 1: adds32 (cpu, imm); break;
+ case 2: sub32 (cpu, imm); break;
+ case 3: subs32 (cpu, imm); break;
+ case 4: add64 (cpu, imm); break;
+ case 5: adds64 (cpu, imm); break;
+ case 6: sub64 (cpu, imm); break;
+ case 7: subs64 (cpu, imm); break;
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexAddSubtractShiftedRegister (sim_cpu *cpu)
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
+ instr[28,24] = 01011
+ instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
+ instr[21] = 0
+ instr[20,16] = Rm
+ instr[15,10] = count : must be 0xxxxx for 32 bit
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ /* 32 bit operations must have count[5] = 0
+ or else we have an UNALLOC. */
+ uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10);
+ /* Shift encoded as ROR is unallocated. */
+ Shift shiftType = shift (aarch64_get_instr (cpu), 22);
+ /* Dispatch on size:op i.e aarch64_get_instr (cpu)[31,29]. */
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+
+ NYI_assert (28, 24, 0x0B);
+ NYI_assert (21, 21, 0);
+
+ if (shiftType == ROR)
+ HALT_UNALLOC;
+
+ if (!size && uimm (count, 5, 5))
+ HALT_UNALLOC;
+
+ switch (dispatch)
+ {
+ case 0: add32_shift (cpu, shiftType, count); break;
+ case 1: adds32_shift (cpu, shiftType, count); break;
+ case 2: sub32_shift (cpu, shiftType, count); break;
+ case 3: subs32_shift (cpu, shiftType, count); break;
+ case 4: add64_shift (cpu, shiftType, count); break;
+ case 5: adds64_shift (cpu, shiftType, count); break;
+ case 6: sub64_shift (cpu, shiftType, count); break;
+ case 7: subs64_shift (cpu, shiftType, count); break;
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexAddSubtractExtendedRegister (sim_cpu *cpu)
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30] = op : 0 ==> ADD, 1 ==> SUB
+ instr[29] = set? : 0 ==> no flags, 1 ==> set flags
+ instr[28,24] = 01011
+ instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
+ instr[21] = 1
+ instr[20,16] = Rm
+ instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
+ 000 ==> LSL|UXTW, 001 ==> UXTZ,
+ 000 ==> SXTB, 001 ==> SXTH,
+ 000 ==> SXTW, 001 ==> SXTX,
+ instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ Extension extensionType = extension (aarch64_get_instr (cpu), 13);
+ uint32_t shift = uimm (aarch64_get_instr (cpu), 12, 10);
+ /* dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+
+ NYI_assert (28, 24, 0x0B);
+ NYI_assert (21, 21, 1);
+
+ /* Shift may not exceed 4. */
+ if (shift > 4)
+ HALT_UNALLOC;
+
+ switch (dispatch)
+ {
+ case 0: add32_ext (cpu, extensionType, shift); break;
+ case 1: adds32_ext (cpu, extensionType, shift); break;
+ case 2: sub32_ext (cpu, extensionType, shift); break;
+ case 3: subs32_ext (cpu, extensionType, shift); break;
+ case 4: add64_ext (cpu, extensionType, shift); break;
+ case 5: adds64_ext (cpu, extensionType, shift); break;
+ case 6: sub64_ext (cpu, extensionType, shift); break;
+ case 7: subs64_ext (cpu, extensionType, shift); break;
+ default: HALT_UNALLOC;
+ }
+}
+
+/* Conditional data processing
+ Condition register is implicit 3rd source. */
+
+/* 32 bit add with carry. */
+/* N.B register args may not be SP. */
+
+static void
+adc32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ + aarch64_get_reg_u32 (cpu, rm, NO_SP)
+ + IS_SET (C));
+}
+
+/* 64 bit add with carry */
+static void
+adc64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ + aarch64_get_reg_u64 (cpu, rm, NO_SP)
+ + IS_SET (C));
+}
+
+/* 32 bit add with carry setting flags. */
+static void
+adcs32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
+ uint32_t carry = IS_SET (C);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
+ set_flags_for_add32 (cpu, value1, value2 + carry);
+}
+
+/* 64 bit add with carry setting flags. */
+static void
+adcs64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
+ uint64_t carry = IS_SET (C);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
+ set_flags_for_add64 (cpu, value1, value2 + carry);
+}
+
+/* 32 bit sub with carry. */
+static void
+sbc32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ - aarch64_get_reg_u32 (cpu, rm, NO_SP)
+ - 1 + IS_SET (C));
+}
+
+/* 64 bit sub with carry */
+static void
+sbc64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ - aarch64_get_reg_u64 (cpu, rm, NO_SP)
+ - 1 + IS_SET (C));
+}
+
+/* 32 bit sub with carry setting flags */
+static void
+sbcs32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
+ uint32_t carry = IS_SET (C);
+ uint32_t result = value1 - value2 + 1 - carry;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+ set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
+}
+
+/* 64 bit sub with carry setting flags */
+static void
+sbcs64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
+ uint64_t carry = IS_SET (C);
+ uint64_t result = value1 - value2 + 1 - carry;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+ set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
+}
+
+static void
+dexAddSubtractWithCarry (sim_cpu *cpu)
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30] = op : 0 ==> ADC, 1 ==> SBC
+ instr[29] = set? : 0 ==> no flags, 1 ==> set flags
+ instr[28,21] = 1 1010 000
+ instr[20,16] = Rm
+ instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ uint32_t op2 = uimm (aarch64_get_instr (cpu), 15, 10);
+ /* Dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+
+ NYI_assert (28, 21, 0xD0);
+
+ if (op2 != 0)
+ HALT_UNALLOC;
+
+ switch (dispatch)
+ {
+ case 0: adc32 (cpu); break;
+ case 1: adcs32 (cpu); break;
+ case 2: sbc32 (cpu); break;
+ case 3: sbcs32 (cpu); break;
+ case 4: adc64 (cpu); break;
+ case 5: adcs64 (cpu); break;
+ case 6: sbc64 (cpu); break;
+ case 7: sbcs64 (cpu); break;
+ default: HALT_UNALLOC;
+ }
+}
+
+static uint32_t
+testConditionCode (sim_cpu *cpu, CondCode cc)
+{
+ /* This should be reduceable to branchless logic
+ by some careful testing of bits in CC followed
+ by the requisite masking and combining of bits
+ from the flag register.
+
+ For now we do it with a switch. */
+ int res;
+
+ switch (cc)
+ {
+ case EQ: res = IS_SET (Z); break;
+ case NE: res = IS_CLEAR (Z); break;
+ case CS: res = IS_SET (C); break;
+ case CC: res = IS_CLEAR (C); break;
+ case MI: res = IS_SET (N); break;
+ case PL: res = IS_CLEAR (N); break;
+ case VS: res = IS_SET (V); break;
+ case VC: res = IS_CLEAR (V); break;
+ case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
+ case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
+ case GE: res = IS_SET (N) == IS_SET (V); break;
+ case LT: res = IS_SET (N) != IS_SET (V); break;
+ case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
+ case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
+ case AL:
+ case NV:
+ default:
+ res = 1;
+ break;
+ }
+ return res;
+}
+
+static void
+CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30] = compare with positive (0) or negative value (1)
+ instr[29,21] = 1 1101 0010
+ instr[20,16] = Rm or const
+ instr[15,12] = cond
+ instr[11] = compare reg (0) or const (1)
+ instr[10] = 0
+ instr[9,5] = Rn
+ instr[4] = 0
+ instr[3,0] = value for CPSR bits if the comparison does not take place. */
+ signed int negate;
+ unsigned rm;
+ unsigned rn;
+
+ NYI_assert (29, 21, 0x1d2);
+ NYI_assert (10, 10, 0);
+ NYI_assert (4, 4, 0);
+
+ if (! testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12)))
+ {
+ aarch64_set_CPSR (cpu, uimm (aarch64_get_instr (cpu), 3, 0));
+ return;
+ }
+
+ negate = uimm (aarch64_get_instr (cpu), 30, 30) ? -1 : 1;
+ rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ if (uimm (aarch64_get_instr (cpu), 31, 31))
+ {
+ if (uimm (aarch64_get_instr (cpu), 11, 11))
+ set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
+ negate * (uint64_t) rm);
+ else
+ set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
+ negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
+ }
+ else
+ {
+ if (uimm (aarch64_get_instr (cpu), 11, 11))
+ set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
+ negate * rm);
+ else
+ set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
+ negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
+ }
+}
+
+static void
+do_vec_MOV_whole_vector (sim_cpu *cpu)
+{
+ /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
+
+ instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,21] = 001110101
+ instr[20,16] = Vs
+ instr[15,10] = 000111
+ instr[9,5] = Vs
+ instr[4,0] = Vd */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (29, 21, 0x075);
+ NYI_assert (15, 10, 0x07);
+
+ if (uimm (aarch64_get_instr (cpu), 20, 16) != vs)
+ HALT_NYI;
+
+ if (uimm (aarch64_get_instr (cpu), 30, 30))
+ aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
+
+ aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
+}
+
+static void
+do_vec_MOV_into_scalar (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = word(0)/long(1)
+ instr[29,21] = 00 1110 000
+ instr[20,18] = element size and index
+ instr[17,10] = 00 0011 11
+ instr[9,5] = V source
+ instr[4,0] = R dest */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (29, 21, 0x070);
+ NYI_assert (17, 10, 0x0F);
+
+ switch (uimm (aarch64_get_instr (cpu), 20, 18))
+ {
+ case 0x2:
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
+ break;
+
+ case 0x6:
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
+ break;
+
+ case 0x1:
+ case 0x3:
+ case 0x5:
+ case 0x7:
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
+ (cpu, vs, uimm (aarch64_get_instr (cpu), 20, 19)));
+ break;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_INS (sim_cpu *cpu)
+{
+ /* instr[31,21] = 01001110000
+ instr[20,16] = element size and index
+ instr[15,10] = 000111
+ instr[9,5] = W source
+ instr[4,0] = V dest */
+
+ int index;
+ unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 21, 0x270);
+ NYI_assert (15, 10, 0x07);
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 17);
+ aarch64_set_vec_u8 (cpu, vd, index,
+ aarch64_get_reg_u8 (cpu, rs, NO_SP));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 17, 17))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 18);
+ aarch64_set_vec_u16 (cpu, vd, index,
+ aarch64_get_reg_u16 (cpu, rs, NO_SP));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 18, 18))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_vec_u32 (cpu, vd, index,
+ aarch64_get_reg_u32 (cpu, rs, NO_SP));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 19, 19))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_vec_u64 (cpu, vd, index,
+ aarch64_get_reg_u64 (cpu, rs, NO_SP));
+ }
+ else
+ HALT_NYI;
+}
+
+static void
+do_vec_DUP_vector_into_vector (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,21] = 00 1110 000
+ instr[20,16] = element size and index
+ instr[15,10] = 0000 01
+ instr[9,5] = V source
+ instr[4,0] = V dest. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ int i, index;
+
+ NYI_assert (29, 21, 0x070);
+ NYI_assert (15, 10, 0x01);
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 17);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 17, 17))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 18);
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 18, 18))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 19);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
+ }
+ else
+ {
+ if (uimm (aarch64_get_instr (cpu), 19, 19) == 0)
+ HALT_UNALLOC;
+
+ if (! full)
+ HALT_UNALLOC;
+
+ index = uimm (aarch64_get_instr (cpu), 20, 20);
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
+ }
+}
+
+static void
+do_vec_TBL (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,21] = 00 1110 000
+ instr[20,16] = Vm
+ instr[15] = 0
+ instr[14,13] = vec length
+ instr[12,10] = 000
+ instr[9,5] = V start
+ instr[4,0] = V dest */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int len = uimm (aarch64_get_instr (cpu), 14, 13) + 1;
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 21, 0x070);
+ NYI_assert (12, 10, 0);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
+ uint8_t val;
+
+ if (selector < 16)
+ val = aarch64_get_vec_u8 (cpu, vn, selector);
+ else if (selector < 32)
+ val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
+ else if (selector < 48)
+ val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
+ else if (selector < 64)
+ val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
+ else
+ val = 0;
+
+ aarch64_set_vec_u8 (cpu, vd, i, val);
+ }
+}
+
+static void
+do_vec_TRN (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size
+ instr[21] = 0
+ instr[20,16] = Vm
+ instr[15] = 0
+ instr[14] = TRN1 (0) / TRN2 (1)
+ instr[13,10] = 1010
+ instr[9,5] = V source
+ instr[4,0] = V dest. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int second = uimm (aarch64_get_instr (cpu), 14, 14);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (13, 10, 0xA);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ aarch64_set_vec_u8
+ (cpu, vd, i * 2,
+ aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
+ aarch64_set_vec_u8
+ (cpu, vd, 1 * 2 + 1,
+ aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
+ }
+ break;
+
+ case 1:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ aarch64_set_vec_u16
+ (cpu, vd, i * 2,
+ aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
+ aarch64_set_vec_u16
+ (cpu, vd, 1 * 2 + 1,
+ aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
+ }
+ break;
+
+ case 2:
+ aarch64_set_vec_u32
+ (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
+ aarch64_set_vec_u32
+ (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
+ aarch64_set_vec_u32
+ (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
+ aarch64_set_vec_u32
+ (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
+ break;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+
+ aarch64_set_vec_u64 (cpu, vd, 0,
+ aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
+ aarch64_set_vec_u64 (cpu, vd, 1,
+ aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
+ [must be 1 for 64-bit xfer]
+ instr[29,20] = 00 1110 0000
+ instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
+ 0100=> 32-bits. 1000=>64-bits
+ instr[15,10] = 0000 11
+ instr[9,5] = W source
+ instr[4,0] = V dest. */
+
+ unsigned i;
+ unsigned Vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned Rs = uimm (aarch64_get_instr (cpu), 9, 5);
+ int both = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 20, 0x0E0);
+ NYI_assert (15, 10, 0x03);
+
+ switch (uimm (aarch64_get_instr (cpu), 19, 16))
+ {
+ case 1:
+ for (i = 0; i < (both ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
+ break;
+
+ case 2:
+ for (i = 0; i < (both ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
+ break;
+
+ case 4:
+ for (i = 0; i < (both ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
+ break;
+
+ case 8:
+ if (!both)
+ HALT_NYI;
+ aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
+ aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
+ break;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_UZP (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: byte(00), half(01), word (10), long (11)
+ instr[21] = 0
+ instr[20,16] = Vm
+ instr[15] = 0
+ instr[14] = lower (0) / upper (1)
+ instr[13,10] = 0110
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int upper = uimm (aarch64_get_instr (cpu), 14, 14);
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
+ uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
+ uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
+ uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
+
+ uint64_t val1 = 0;
+ uint64_t val2 = 0;
+
+ uint64_t input1 = upper ? val_n1 : val_m1;
+ uint64_t input2 = upper ? val_n2 : val_m2;
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 0);
+ NYI_assert (15, 15, 0);
+ NYI_assert (13, 10, 6);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 23))
+ {
+ case 0:
+ for (i = 0; i < 8; i++)
+ {
+ val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
+ val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
+ }
+ break;
+
+ case 1:
+ for (i = 0; i < 4; i++)
+ {
+ val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
+ val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
+ }
+ break;
+
+ case 2:
+ val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
+ val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
+
+ case 3:
+ val1 = input1;
+ val2 = input2;
+ break;
+ }
+
+ aarch64_set_vec_u64 (cpu, vd, 0, val1);
+ if (full)
+ aarch64_set_vec_u64 (cpu, vd, 1, val2);
+}
+
+static void
+do_vec_ZIP (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: byte(00), hald(01), word (10), long (11)
+ instr[21] = 0
+ instr[20,16] = Vm
+ instr[15] = 0
+ instr[14] = lower (0) / upper (1)
+ instr[13,10] = 1110
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int upper = uimm (aarch64_get_instr (cpu), 14, 14);
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
+ uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
+ uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
+ uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
+
+ uint64_t val1 = 0;
+ uint64_t val2 = 0;
+
+ uint64_t input1 = upper ? val_n1 : val_m1;
+ uint64_t input2 = upper ? val_n2 : val_m2;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 0);
+ NYI_assert (15, 15, 0);
+ NYI_assert (13, 10, 0xE);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 23))
+ {
+ case 0:
+ val1 =
+ ((input1 << 0) & (0xFF << 0))
+ | ((input2 << 8) & (0xFF << 8))
+ | ((input1 << 8) & (0xFF << 16))
+ | ((input2 << 16) & (0xFF << 24))
+ | ((input1 << 16) & (0xFFULL << 32))
+ | ((input2 << 24) & (0xFFULL << 40))
+ | ((input1 << 24) & (0xFFULL << 48))
+ | ((input2 << 32) & (0xFFULL << 56));
+
+ val2 =
+ ((input1 >> 32) & (0xFF << 0))
+ | ((input2 >> 24) & (0xFF << 8))
+ | ((input1 >> 24) & (0xFF << 16))
+ | ((input2 >> 16) & (0xFF << 24))
+ | ((input1 >> 16) & (0xFFULL << 32))
+ | ((input2 >> 8) & (0xFFULL << 40))
+ | ((input1 >> 8) & (0xFFULL << 48))
+ | ((input2 >> 0) & (0xFFULL << 56));
+ break;
+
+ case 1:
+ val1 =
+ ((input1 << 0) & (0xFFFF << 0))
+ | ((input2 << 16) & (0xFFFF << 16))
+ | ((input1 << 16) & (0xFFFFULL << 32))
+ | ((input2 << 32) & (0xFFFFULL << 48));
+
+ val2 =
+ ((input1 >> 32) & (0xFFFF << 0))
+ | ((input2 >> 16) & (0xFFFF << 16))
+ | ((input1 >> 16) & (0xFFFFULL << 32))
+ | ((input2 >> 0) & (0xFFFFULL << 48));
+ break;
+
+ case 2:
+ val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
+ val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
+ break;
+
+ case 3:
+ val1 = input1;
+ val2 = input2;
+ break;
+ }
+
+ aarch64_set_vec_u64 (cpu, vd, 0, val1);
+ if (full)
+ aarch64_set_vec_u64 (cpu, vd, 1, val2);
+}
+
+/* Floating point immediates are encoded in 8 bits.
+ fpimm[7] = sign bit.
+ fpimm[6:4] = signed exponent.
+ fpimm[3:0] = fraction (assuming leading 1).
+ i.e. F = s * 1.f * 2^(e - b). */
+
+static float
+fp_immediate_for_encoding_32 (uint32_t imm8)
+{
+ float u;
+ uint32_t s, e, f, i;
+
+ s = (imm8 >> 7) & 0x1;
+ e = (imm8 >> 4) & 0x7;
+ f = imm8 & 0xf;
+
+ /* The fp value is s * n/16 * 2r where n is 16+e. */
+ u = (16.0 + f) / 16.0;
+
+ /* N.B. exponent is signed. */
+ if (e < 4)
+ {
+ int epos = e;
+
+ for (i = 0; i <= epos; i++)
+ u *= 2.0;
+ }
+ else
+ {
+ int eneg = 7 - e;
+
+ for (i = 0; i < eneg; i++)
+ u /= 2.0;
+ }
+
+ if (s)
+ u = - u;
+
+ return u;
+}
+
+static double
+fp_immediate_for_encoding_64 (uint32_t imm8)
+{
+ double u;
+ uint32_t s, e, f, i;
+
+ s = (imm8 >> 7) & 0x1;
+ e = (imm8 >> 4) & 0x7;
+ f = imm8 & 0xf;
+
+ /* The fp value is s * n/16 * 2r where n is 16+e. */
+ u = (16.0 + f) / 16.0;
+
+ /* N.B. exponent is signed. */
+ if (e < 4)
+ {
+ int epos = e;
+
+ for (i = 0; i <= epos; i++)
+ u *= 2.0;
+ }
+ else
+ {
+ int eneg = 7 - e;
+
+ for (i = 0; i < eneg; i++)
+ u /= 2.0;
+ }
+
+ if (s)
+ u = - u;
+
+ return u;
+}
+
+static void
+do_vec_MOV_immediate (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,19] = 00111100000
+ instr[18,16] = high 3 bits of uimm8
+ instr[15,12] = size & shift:
+ 0000 => 32-bit
+ 0010 => 32-bit + LSL#8
+ 0100 => 32-bit + LSL#16
+ 0110 => 32-bit + LSL#24
+ 1010 => 16-bit + LSL#8
+ 1000 => 16-bit
+ 1101 => 32-bit + MSL#16
+ 1100 => 32-bit + MSL#8
+ 1110 => 8-bit
+ 1111 => double
+ instr[11,10] = 01
+ instr[9,5] = low 5-bits of uimm8
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5
+ | uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned i;
+
+ NYI_assert (29, 19, 0x1E0);
+ NYI_assert (11, 10, 1);
+
+ switch (uimm (aarch64_get_instr (cpu), 15, 12))
+ {
+ case 0x0: /* 32-bit, no shift. */
+ case 0x2: /* 32-bit, shift by 8. */
+ case 0x4: /* 32-bit, shift by 16. */
+ case 0x6: /* 32-bit, shift by 24. */
+ val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13));
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ break;
+
+ case 0xa: /* 16-bit, shift by 8. */
+ val <<= 8;
+ /* Fall through. */
+ case 0x8: /* 16-bit, no shift. */
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, val);
+ /* Fall through. */
+ case 0xd: /* 32-bit, mask shift by 16. */
+ val <<= 8;
+ val |= 0xFF;
+ /* Fall through. */
+ case 0xc: /* 32-bit, mask shift by 8. */
+ val <<= 8;
+ val |= 0xFF;
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ break;
+
+ case 0xe: /* 8-bit, no shift. */
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, val);
+ break;
+
+ case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
+ {
+ float u = fp_immediate_for_encoding_32 (val);
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i, u);
+ break;
+ }
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_MVNI (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,19] = 10111100000
+ instr[18,16] = high 3 bits of uimm8
+ instr[15,12] = selector
+ instr[11,10] = 01
+ instr[9,5] = low 5-bits of uimm8
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5
+ | uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned i;
+
+ NYI_assert (29, 19, 0x5E0);
+ NYI_assert (11, 10, 1);
+
+ switch (uimm (aarch64_get_instr (cpu), 15, 12))
+ {
+ case 0x0: /* 32-bit, no shift. */
+ case 0x2: /* 32-bit, shift by 8. */
+ case 0x4: /* 32-bit, shift by 16. */
+ case 0x6: /* 32-bit, shift by 24. */
+ val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13));
+ val = ~ val;
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ return;
+
+ case 0xa: /* 16-bit, 8 bit shift. */
+ val <<= 8;
+ case 0x8: /* 16-bit, no shift. */
+ val = ~ val;
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, val);
+ return;
+
+ case 0xd: /* 32-bit, mask shift by 16. */
+ val <<= 8;
+ val |= 0xFF;
+ case 0xc: /* 32-bit, mask shift by 8. */
+ val <<= 8;
+ val |= 0xFF;
+ val = ~ val;
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ return;
+
+ case 0xE: /* MOVI Dn, #mask64 */
+ {
+ uint64_t mask = 0;
+
+ for (i = 0; i < 8; i++)
+ if (val & (1 << i))
+ mask |= (0xF << (i * 4));
+ aarch64_set_vec_u64 (cpu, vd, 0, mask);
+ aarch64_set_vec_u64 (cpu, vd, 1, 0);
+ return;
+ }
+
+ case 0xf: /* FMOV Vd.2D, #fpimm. */
+ {
+ double u = fp_immediate_for_encoding_64 (val);
+
+ if (! full)
+ HALT_UNALLOC;
+
+ aarch64_set_vec_double (cpu, vd, 0, u);
+ aarch64_set_vec_double (cpu, vd, 1, u);
+ return;
+ }
+
+ default:
+ HALT_NYI;
+ }
+}
+
+#define ABS(A) ((A) < 0 ? - (A) : (A))
+
+static void
+do_vec_ABS (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
+ instr[21,10] = 10 0000 1011 10
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 10, 0x82E);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i,
+ ABS (aarch64_get_vec_s8 (cpu, vn, i)));
+ break;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ ABS (aarch64_get_vec_s16 (cpu, vn, i)));
+ break;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ ABS (aarch64_get_vec_s32 (cpu, vn, i)));
+ break;
+
+ case 3:
+ if (! full)
+ HALT_NYI;
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i,
+ ABS (aarch64_get_vec_s64 (cpu, vn, i)));
+ break;
+ }
+}
+
+static void
+do_vec_ADDV (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,24] = 00 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
+ instr[21,10] = 11 0001 1011 10
+ instr[9,5] = Vm
+ instr[4.0] = Rd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ uint64_t val = 0;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 10, 0xC6E);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ val += aarch64_get_vec_u8 (cpu, vm, i);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ val += aarch64_get_vec_u16 (cpu, vm, i);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ val += aarch64_get_vec_u32 (cpu, vm, i);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
+ return;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+ val = aarch64_get_vec_u64 (cpu, vm, 0);
+ val += aarch64_get_vec_u64 (cpu, vm, 1);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+static void
+do_vec_ins_2 (sim_cpu *cpu)
+{
+ /* instr[31,21] = 01001110000
+ instr[20,18] = size & element selector
+ instr[17,14] = 0000
+ instr[13] = direction: to vec(0), from vec (1)
+ instr[12,10] = 111
+ instr[9,5] = Vm
+ instr[4,0] = Vd. */
+
+ unsigned elem;
+ unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 21, 0x270);
+ NYI_assert (17, 14, 0);
+ NYI_assert (12, 10, 7);
+
+ if (uimm (aarch64_get_instr (cpu), 13, 13) == 1)
+ {
+ if (uimm (aarch64_get_instr (cpu), 18, 18) == 1)
+ {
+ /* 32-bit moves. */
+ elem = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_reg_u64 (cpu, vd, NO_SP,
+ aarch64_get_vec_u32 (cpu, vm, elem));
+ }
+ else
+ {
+ /* 64-bit moves. */
+ if (uimm (aarch64_get_instr (cpu), 19, 19) != 1)
+ HALT_NYI;
+
+ elem = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_reg_u64 (cpu, vd, NO_SP,
+ aarch64_get_vec_u64 (cpu, vm, elem));
+ }
+ }
+ else
+ {
+ if (uimm (aarch64_get_instr (cpu), 18, 18) == 1)
+ {
+ /* 32-bit moves. */
+ elem = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_vec_u32 (cpu, vd, elem,
+ aarch64_get_reg_u32 (cpu, vm, NO_SP));
+ }
+ else
+ {
+ /* 64-bit moves. */
+ if (uimm (aarch64_get_instr (cpu), 19, 19) != 1)
+ HALT_NYI;
+
+ elem = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_vec_u64 (cpu, vd, elem,
+ aarch64_get_reg_u64 (cpu, vm, NO_SP));
+ }
+ }
+}
+
+static void
+do_vec_mull (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = lower(0)/upper(1) selector
+ instr[29] = signed(0)/unsigned(1)
+ instr[28,24] = 0 1110
+ instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,10] = 11 0000
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ int unsign = uimm (aarch64_get_instr (cpu), 29, 29);
+ int bias = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (15, 10, 0x30);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ if (bias)
+ bias = 8;
+ if (unsign)
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i + bias)
+ * aarch64_get_vec_u8 (cpu, vm, i + bias));
+ else
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ aarch64_get_vec_s8 (cpu, vn, i + bias)
+ * aarch64_get_vec_s8 (cpu, vm, i + bias));
+ return;
+
+ case 1:
+ if (bias)
+ bias = 4;
+ if (unsign)
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u16 (cpu, vn, i + bias)
+ * aarch64_get_vec_u16 (cpu, vm, i + bias));
+ else
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ aarch64_get_vec_s16 (cpu, vn, i + bias)
+ * aarch64_get_vec_s16 (cpu, vm, i + bias));
+ return;
+
+ case 2:
+ if (bias)
+ bias = 2;
+ if (unsign)
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u64 (cpu, vd, i,
+ (uint64_t) aarch64_get_vec_u32 (cpu, vn,
+ i + bias)
+ * (uint64_t) aarch64_get_vec_u32 (cpu, vm,
+ i + bias));
+ else
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i,
+ aarch64_get_vec_s32 (cpu, vn, i + bias)
+ * aarch64_get_vec_s32 (cpu, vm, i + bias));
+ return;
+
+ case 3:
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_fadd (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 001110
+ instr[23] = FADD(0)/FSUB(1)
+ instr[22] = float (0)/double(1)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,10] = 110101
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x35);
+
+ if (uimm (aarch64_get_instr (cpu), 23, 23))
+ {
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_NYI;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ aarch64_get_vec_double (cpu, vn, i)
+ - aarch64_get_vec_double (cpu, vm, i));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ aarch64_get_vec_float (cpu, vn, i)
+ - aarch64_get_vec_float (cpu, vm, i));
+ }
+ }
+ else
+ {
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_NYI;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ aarch64_get_vec_double (cpu, vm, i)
+ + aarch64_get_vec_double (cpu, vn, i));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ aarch64_get_vec_float (cpu, vm, i)
+ + aarch64_get_vec_float (cpu, vn, i));
+ }
+ }
+}
+
+static void
+do_vec_add (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,24] = 001110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 100001
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x21);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
+ + aarch64_get_vec_u8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
+ + aarch64_get_vec_u16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
+ + aarch64_get_vec_u32 (cpu, vm, i));
+ return;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+ aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
+ + aarch64_get_vec_u64 (cpu, vm, 0));
+ aarch64_set_vec_u64 (cpu, vd, 1,
+ aarch64_get_vec_u64 (cpu, vn, 1)
+ + aarch64_get_vec_u64 (cpu, vm, 1));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+static void
+do_vec_mul (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,24] = 00 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 10 0111
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x27);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
+ val *= aarch64_get_vec_u8 (cpu, vm, i);
+
+ aarch64_set_vec_u16 (cpu, vd, i, val);
+ }
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
+ val *= aarch64_get_vec_u16 (cpu, vm, i);
+
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ }
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
+ val *= aarch64_get_vec_u32 (cpu, vm, i);
+
+ aarch64_set_vec_u64 (cpu, vd, i, val);
+ }
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+do_vec_MLA (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,24] = 00 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 1001 01
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x25);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ uint16_t val = aarch64_get_vec_u8 (cpu, vn, i);
+ val *= aarch64_get_vec_u8 (cpu, vm, i);
+ val += aarch64_get_vec_u8 (cpu, vd, i);
+
+ aarch64_set_vec_u16 (cpu, vd, i, val);
+ }
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ uint32_t val = aarch64_get_vec_u16 (cpu, vn, i);
+ val *= aarch64_get_vec_u16 (cpu, vm, i);
+ val += aarch64_get_vec_u16 (cpu, vd, i);
+
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ }
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ uint64_t val = aarch64_get_vec_u32 (cpu, vn, i);
+ val *= aarch64_get_vec_u32 (cpu, vm, i);
+ val += aarch64_get_vec_u32 (cpu, vd, i);
+
+ aarch64_set_vec_u64 (cpu, vd, i, val);
+ }
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+}
+
+static float
+fmaxnm (float a, float b)
+{
+ if (fpclassify (a) == FP_NORMAL)
+ {
+ if (fpclassify (b) == FP_NORMAL)
+ return a > b ? a : b;
+ return a;
+ }
+ else if (fpclassify (b) == FP_NORMAL)
+ return b;
+ return a;
+}
+
+static float
+fminnm (float a, float b)
+{
+ if (fpclassify (a) == FP_NORMAL)
+ {
+ if (fpclassify (b) == FP_NORMAL)
+ return a < b ? a : b;
+ return a;
+ }
+ else if (fpclassify (b) == FP_NORMAL)
+ return b;
+ return a;
+}
+
+static double
+dmaxnm (double a, double b)
+{
+ if (fpclassify (a) == FP_NORMAL)
+ {
+ if (fpclassify (b) == FP_NORMAL)
+ return a > b ? a : b;
+ return a;
+ }
+ else if (fpclassify (b) == FP_NORMAL)
+ return b;
+ return a;
+}
+
+static double
+dminnm (double a, double b)
+{
+ if (fpclassify (a) == FP_NORMAL)
+ {
+ if (fpclassify (b) == FP_NORMAL)
+ return a < b ? a : b;
+ return a;
+ }
+ else if (fpclassify (b) == FP_NORMAL)
+ return b;
+ return a;
+}
+
+static void
+do_vec_FminmaxNMP (sim_cpu *cpu)
+{
+ /* aarch64_get_instr (cpu)[31] = 0
+ aarch64_get_instr (cpu)[30] = half (0)/full (1)
+ aarch64_get_instr (cpu)[29,24] = 10 1110
+ aarch64_get_instr (cpu)[23] = max(0)/min(1)
+ aarch64_get_instr (cpu)[22] = float (0)/double (1)
+ aarch64_get_instr (cpu)[21] = 1
+ aarch64_get_instr (cpu)[20,16] = Vn
+ aarch64_get_instr (cpu)[15,10] = 1100 01
+ aarch64_get_instr (cpu)[9,5] = Vm
+ aarch64_get_instr (cpu)[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 24, 0x2E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x31);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double (* fn)(double, double) = uimm (aarch64_get_instr (cpu), 23, 23)
+ ? dminnm : dmaxnm;
+
+ if (! full)
+ HALT_NYI;
+ aarch64_set_vec_double (cpu, vd, 0,
+ fn (aarch64_get_vec_double (cpu, vn, 0),
+ aarch64_get_vec_double (cpu, vn, 1)));
+ aarch64_set_vec_double (cpu, vd, 0,
+ fn (aarch64_get_vec_double (cpu, vm, 0),
+ aarch64_get_vec_double (cpu, vm, 1)));
+ }
+ else
+ {
+ float (* fn)(float, float) = uimm (aarch64_get_instr (cpu), 23, 23)
+ ? fminnm : fmaxnm;
+
+ aarch64_set_vec_float (cpu, vd, 0,
+ fn (aarch64_get_vec_float (cpu, vn, 0),
+ aarch64_get_vec_float (cpu, vn, 1)));
+ if (full)
+ aarch64_set_vec_float (cpu, vd, 1,
+ fn (aarch64_get_vec_float (cpu, vn, 2),
+ aarch64_get_vec_float (cpu, vn, 3)));
+
+ aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
+ fn (aarch64_get_vec_float (cpu, vm, 0),
+ aarch64_get_vec_float (cpu, vm, 1)));
+ if (full)
+ aarch64_set_vec_float (cpu, vd, 3,
+ fn (aarch64_get_vec_float (cpu, vm, 2),
+ aarch64_get_vec_float (cpu, vm, 3)));
+ }
+}
+
+static void
+do_vec_AND (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 001110001
+ instr[20,16] = Vm
+ instr[15,10] = 000111
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x071);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vn, i)
+ & aarch64_get_vec_u32 (cpu, vm, i));
+}
+
+static void
+do_vec_BSL (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 101110011
+ instr[20,16] = Vm
+ instr[15,10] = 000111
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x173);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ ( aarch64_get_vec_u8 (cpu, vd, i)
+ & aarch64_get_vec_u8 (cpu, vn, i))
+ | ((~ aarch64_get_vec_u8 (cpu, vd, i))
+ & aarch64_get_vec_u8 (cpu, vm, i)));
+}
+
+static void
+do_vec_EOR (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 10 1110 001
+ instr[20,16] = Vm
+ instr[15,10] = 000111
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x171);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vn, i)
+ ^ aarch64_get_vec_u32 (cpu, vm, i));
+}
+
+static void
+do_vec_bit (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,23] = 10 1110 1
+ instr[22] = BIT (0) / BIF (1)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,10] = 0001 11
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned test_false = uimm (aarch64_get_instr (cpu), 22, 22);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x5D);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x07);
+
+ if (test_false)
+ {
+ for (i = 0; i < (full ? 16 : 8); i++)
+ if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 16 : 8); i++)
+ if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
+ }
+}
+
+static void
+do_vec_ORN (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 00 1110 111
+ instr[20,16] = Vm
+ instr[15,10] = 00 0111
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x077);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i)
+ | ~ aarch64_get_vec_u8 (cpu, vm, i));
+}
+
+static void
+do_vec_ORR (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 00 1110 101
+ instr[20,16] = Vm
+ instr[15,10] = 0001 11
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x075);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i)
+ | aarch64_get_vec_u8 (cpu, vm, i));
+}
+
+static void
+do_vec_BIC (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 00 1110 011
+ instr[20,16] = Vm
+ instr[15,10] = 00 0111
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 21, 0x073);
+ NYI_assert (15, 10, 0x07);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i)
+ & ~ aarch64_get_vec_u8 (cpu, vm, i));
+}
+
+static void
+do_vec_XTN (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = first part (0)/ second part (1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: byte(00), half(01), word (10)
+ instr[21,10] = 1000 0100 1010
+ instr[9,5] = Vs
+ instr[4,0] = Vd. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned bias = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 10, 0x84A);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ if (bias)
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_u8 (cpu, vd, i + 8,
+ aarch64_get_vec_u16 (cpu, vs, i) >> 8);
+ else
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
+ return;
+
+ case 1:
+ if (bias)
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_u16 (cpu, vd, i + 4,
+ aarch64_get_vec_u32 (cpu, vs, i) >> 16);
+ else
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
+ return;
+
+ case 2:
+ if (bias)
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u32 (cpu, vd, i + 4,
+ aarch64_get_vec_u64 (cpu, vs, i) >> 32);
+ else
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
+ return;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+#define MAX(A,B) ((A) > (B) ? (A) : (B))
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
+
+static void
+do_vec_maxv (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29] = signed (0)/unsigned(1)
+ instr[28,24] = 0 1110
+ instr[23,22] = size: byte(00), half(01), word (10)
+ instr[21] = 1
+ instr[20,17] = 1 000
+ instr[16] = max(0)/min(1)
+ instr[15,10] = 1010 10
+ instr[9,5] = V source
+ instr[4.0] = R dest. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned i;
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (20, 17, 8);
+ NYI_assert (15, 10, 0x2A);
+
+ switch ((uimm (aarch64_get_instr (cpu), 29, 29) << 1)
+ | uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ case 0: /* SMAXV. */
+ {
+ int64_t smax;
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ smax = aarch64_get_vec_s8 (cpu, vs, 0);
+ for (i = 1; i < (full ? 16 : 8); i++)
+ smax = MAX (smax, aarch64_get_vec_s8 (cpu, vs, i));
+ break;
+ case 1:
+ smax = aarch64_get_vec_s16 (cpu, vs, 0);
+ for (i = 1; i < (full ? 8 : 4); i++)
+ smax = MAX (smax, aarch64_get_vec_s16 (cpu, vs, i));
+ break;
+ case 2:
+ smax = aarch64_get_vec_s32 (cpu, vs, 0);
+ for (i = 1; i < (full ? 4 : 2); i++)
+ smax = MAX (smax, aarch64_get_vec_s32 (cpu, vs, i));
+ break;
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
+ return;
+ }
+
+ case 1: /* SMINV. */
+ {
+ int64_t smin;
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ smin = aarch64_get_vec_s8 (cpu, vs, 0);
+ for (i = 1; i < (full ? 16 : 8); i++)
+ smin = MIN (smin, aarch64_get_vec_s8 (cpu, vs, i));
+ break;
+ case 1:
+ smin = aarch64_get_vec_s16 (cpu, vs, 0);
+ for (i = 1; i < (full ? 8 : 4); i++)
+ smin = MIN (smin, aarch64_get_vec_s16 (cpu, vs, i));
+ break;
+ case 2:
+ smin = aarch64_get_vec_s32 (cpu, vs, 0);
+ for (i = 1; i < (full ? 4 : 2); i++)
+ smin = MIN (smin, aarch64_get_vec_s32 (cpu, vs, i));
+ break;
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
+ return;
+ }
+
+ case 2: /* UMAXV. */
+ {
+ uint64_t umax;
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ umax = aarch64_get_vec_u8 (cpu, vs, 0);
+ for (i = 1; i < (full ? 16 : 8); i++)
+ umax = MAX (umax, aarch64_get_vec_u8 (cpu, vs, i));
+ break;
+ case 1:
+ umax = aarch64_get_vec_u16 (cpu, vs, 0);
+ for (i = 1; i < (full ? 8 : 4); i++)
+ umax = MAX (umax, aarch64_get_vec_u16 (cpu, vs, i));
+ break;
+ case 2:
+ umax = aarch64_get_vec_u32 (cpu, vs, 0);
+ for (i = 1; i < (full ? 4 : 2); i++)
+ umax = MAX (umax, aarch64_get_vec_u32 (cpu, vs, i));
+ break;
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
+ return;
+ }
+
+ case 3: /* UMINV. */
+ {
+ uint64_t umin;
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ umin = aarch64_get_vec_u8 (cpu, vs, 0);
+ for (i = 1; i < (full ? 16 : 8); i++)
+ umin = MIN (umin, aarch64_get_vec_u8 (cpu, vs, i));
+ break;
+ case 1:
+ umin = aarch64_get_vec_u16 (cpu, vs, 0);
+ for (i = 1; i < (full ? 8 : 4); i++)
+ umin = MIN (umin, aarch64_get_vec_u16 (cpu, vs, i));
+ break;
+ case 2:
+ umin = aarch64_get_vec_u32 (cpu, vs, 0);
+ for (i = 1; i < (full ? 4 : 2); i++)
+ umin = MIN (umin, aarch64_get_vec_u32 (cpu, vs, i));
+ break;
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
+ return;
+ }
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+do_vec_fminmaxV (sim_cpu *cpu)
+{
+ /* instr[31,24] = 0110 1110
+ instr[23] = max(0)/min(1)
+ instr[22,14] = 011 0000 11
+ instr[13,12] = nm(00)/normal(11)
+ instr[11,10] = 10
+ instr[9,5] = V source
+ instr[4.0] = R dest. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ float res = aarch64_get_vec_float (cpu, vs, 0);
+
+ NYI_assert (31, 24, 0x6E);
+ NYI_assert (22, 14, 0x0C3);
+ NYI_assert (11, 10, 2);
+
+ if (uimm (aarch64_get_instr (cpu), 23, 23))
+ {
+ switch (uimm (aarch64_get_instr (cpu), 13, 12))
+ {
+ case 0: /* FMNINNMV. */
+ for (i = 1; i < 4; i++)
+ res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
+ break;
+
+ case 3: /* FMINV. */
+ for (i = 1; i < 4; i++)
+ res = MIN (res, aarch64_get_vec_float (cpu, vs, i));
+ break;
+
+ default:
+ HALT_NYI;
+ }
+ }
+ else
+ {
+ switch (uimm (aarch64_get_instr (cpu), 13, 12))
+ {
+ case 0: /* FMNAXNMV. */
+ for (i = 1; i < 4; i++)
+ res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
+ break;
+
+ case 3: /* FMAXV. */
+ for (i = 1; i < 4; i++)
+ res = MAX (res, aarch64_get_vec_float (cpu, vs, i));
+ break;
+
+ default:
+ HALT_NYI;
+ }
+ }
+
+ aarch64_set_FP_float (cpu, rd, res);
+}
+
+static void
+do_vec_Fminmax (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23] = max(0)/min(1)
+ instr[22] = float(0)/double(1)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,14] = 11
+ instr[13,12] = nm(00)/normal(11)
+ instr[11,10] = 01
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned min = uimm (aarch64_get_instr (cpu), 23, 23);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 14, 3);
+ NYI_assert (11, 10, 1);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double (* func)(double, double);
+
+ if (! full)
+ HALT_NYI;
+
+ if (uimm (aarch64_get_instr (cpu), 13, 12) == 0)
+ func = min ? dminnm : dmaxnm;
+ else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3)
+ func = min ? fmin : fmax;
+ else
+ HALT_NYI;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ func (aarch64_get_vec_double (cpu, vn, i),
+ aarch64_get_vec_double (cpu, vm, i)));
+ }
+ else
+ {
+ float (* func)(float, float);
+
+ if (uimm (aarch64_get_instr (cpu), 13, 12) == 0)
+ func = min ? fminnm : fmaxnm;
+ else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3)
+ func = min ? fminf : fmaxf;
+ else
+ HALT_NYI;
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ func (aarch64_get_vec_float (cpu, vn, i),
+ aarch64_get_vec_float (cpu, vm, i)));
+ }
+}
+
+static void
+do_vec_SCVTF (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = Q
+ instr[29,23] = 00 1110 0
+ instr[22] = float(0)/double(1)
+ instr[21,10] = 10 0001 1101 10
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 22, 22);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x1C);
+ NYI_assert (21, 10, 0x876);
+
+ if (size)
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ {
+ double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
+ aarch64_set_vec_double (cpu, vd, i, val);
+ }
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
+ aarch64_set_vec_float (cpu, vd, i, val);
+ }
+ }
+}
+
+#define VEC_CMP(SOURCE, CMP) \
+ do \
+ { \
+ switch (size) \
+ { \
+ case 0: \
+ for (i = 0; i < (full ? 16 : 8); i++) \
+ aarch64_set_vec_u8 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
+ ? -1 : 0); \
+ return; \
+ case 1: \
+ for (i = 0; i < (full ? 8 : 4); i++) \
+ aarch64_set_vec_u16 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
+ ? -1 : 0); \
+ return; \
+ case 2: \
+ for (i = 0; i < (full ? 4 : 2); i++) \
+ aarch64_set_vec_u32 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
+ ? -1 : 0); \
+ return; \
+ case 3: \
+ if (! full) \
+ HALT_UNALLOC; \
+ for (i = 0; i < 2; i++) \
+ aarch64_set_vec_u64 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
+ ? -1ULL : 0); \
+ return; \
+ default: \
+ HALT_UNALLOC; \
+ } \
+ } \
+ while (0)
+
+#define VEC_CMP0(SOURCE, CMP) \
+ do \
+ { \
+ switch (size) \
+ { \
+ case 0: \
+ for (i = 0; i < (full ? 16 : 8); i++) \
+ aarch64_set_vec_u8 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
+ CMP 0 ? -1 : 0); \
+ return; \
+ case 1: \
+ for (i = 0; i < (full ? 8 : 4); i++) \
+ aarch64_set_vec_u16 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
+ CMP 0 ? -1 : 0); \
+ return; \
+ case 2: \
+ for (i = 0; i < (full ? 4 : 2); i++) \
+ aarch64_set_vec_u32 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
+ CMP 0 ? -1 : 0); \
+ return; \
+ case 3: \
+ if (! full) \
+ HALT_UNALLOC; \
+ for (i = 0; i < 2; i++) \
+ aarch64_set_vec_u64 (cpu, vd, i, \
+ aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
+ CMP 0 ? -1ULL : 0); \
+ return; \
+ default: \
+ HALT_UNALLOC; \
+ } \
+ } \
+ while (0)
+
+#define VEC_FCMP0(CMP) \
+ do \
+ { \
+ if (vm != 0) \
+ HALT_NYI; \
+ if (uimm (aarch64_get_instr (cpu), 22, 22)) \
+ { \
+ if (! full) \
+ HALT_NYI; \
+ for (i = 0; i < 2; i++) \
+ aarch64_set_vec_u64 (cpu, vd, i, \
+ aarch64_get_vec_double (cpu, vn, i) \
+ CMP 0.0 ? -1 : 0); \
+ } \
+ else \
+ { \
+ for (i = 0; i < (full ? 4 : 2); i++) \
+ aarch64_set_vec_u32 (cpu, vd, i, \
+ aarch64_get_vec_float (cpu, vn, i) \
+ CMP 0.0 ? -1 : 0); \
+ } \
+ return; \
+ } \
+ while (0)
+
+#define VEC_FCMP(CMP) \
+ do \
+ { \
+ if (uimm (aarch64_get_instr (cpu), 22, 22)) \
+ { \
+ if (! full) \
+ HALT_NYI; \
+ for (i = 0; i < 2; i++) \
+ aarch64_set_vec_u64 (cpu, vd, i, \
+ aarch64_get_vec_double (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_double (cpu, vm, i) \
+ ? -1 : 0); \
+ } \
+ else \
+ { \
+ for (i = 0; i < (full ? 4 : 2); i++) \
+ aarch64_set_vec_u32 (cpu, vd, i, \
+ aarch64_get_vec_float (cpu, vn, i) \
+ CMP \
+ aarch64_get_vec_float (cpu, vm, i) \
+ ? -1 : 0); \
+ } \
+ return; \
+ } \
+ while (0)
+
+static void
+do_vec_compare (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29] = part-of-comparison-type
+ instr[28,24] = 0 1110
+ instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
+ type of float compares: single (-0) / double (-1)
+ instr[21] = 1
+ instr[20,16] = Vm or 00000 (compare vs 0)
+ instr[15,10] = part-of-comparison-type
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int size = uimm (aarch64_get_instr (cpu), 23, 22);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+
+ if ((uimm (aarch64_get_instr (cpu), 11, 11)
+ && uimm (aarch64_get_instr (cpu), 14, 14))
+ || ((uimm (aarch64_get_instr (cpu), 11, 11) == 0
+ && uimm (aarch64_get_instr (cpu), 10, 10) == 0)))
+ {
+ /* A compare vs 0. */
+ if (vm != 0)
+ {
+ if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x2A)
+ do_vec_maxv (cpu);
+ else if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x32
+ || uimm (aarch64_get_instr (cpu), 15, 10) == 0x3E)
+ do_vec_fminmaxV (cpu);
+ else if (uimm (aarch64_get_instr (cpu), 29, 23) == 0x1C
+ && uimm (aarch64_get_instr (cpu), 21, 10) == 0x876)
+ do_vec_SCVTF (cpu);
+ else
+ HALT_NYI;
+ return;
+ }
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 14, 14))
+ {
+ /* A floating point compare. */
+ unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 5)
+ | (uimm (aarch64_get_instr (cpu), 23, 23) << 4)
+ | uimm (aarch64_get_instr (cpu), 13, 10);
+
+ NYI_assert (15, 15, 1);
+
+ switch (decode)
+ {
+ case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
+ case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
+ case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
+ case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
+ case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
+ case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
+ case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
+ case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
+
+ default:
+ HALT_NYI;
+ }
+ }
+ else
+ {
+ unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 6)
+ | uimm (aarch64_get_instr (cpu), 15, 10);
+
+ switch (decode)
+ {
+ case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
+ case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
+ case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
+ case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
+ case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
+ case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
+ case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
+ case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
+ case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
+ case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
+ default:
+ if (vm == 0)
+ HALT_NYI;
+ do_vec_maxv (cpu);
+ }
+ }
+}
+
+static void
+do_vec_SSHL (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = first part (0)/ second part (1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: byte(00), half(01), word (10), long (11)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,10] = 0100 01
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x11);
+
+ /* FIXME: What is a signed shift left in this context ?. */
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
+ << aarch64_get_vec_s8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
+ << aarch64_get_vec_s16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
+ << aarch64_get_vec_s32 (cpu, vm, i));
+ return;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
+ << aarch64_get_vec_s64 (cpu, vm, i));
+ return;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_USHL (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = first part (0)/ second part (1)
+ instr[29,24] = 10 1110
+ instr[23,22] = size: byte(00), half(01), word (10), long (11)
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,10] = 0100 01
+ instr[9,5] = Vn
+ instr[4,0] = Vd */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x2E);
+ NYI_assert (15, 10, 0x11);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
+ << aarch64_get_vec_u8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
+ << aarch64_get_vec_u16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
+ << aarch64_get_vec_u32 (cpu, vm, i));
+ return;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
+ << aarch64_get_vec_u64 (cpu, vm, i));
+ return;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_FMLA (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29,23] = 0011100
+ instr[22] = size: 0=>float, 1=>double
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 1100 11
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 23, 0x1C);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x33);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ aarch64_get_vec_double (cpu, vn, i) *
+ aarch64_get_vec_double (cpu, vm, i) +
+ aarch64_get_vec_double (cpu, vd, i));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ aarch64_get_vec_float (cpu, vn, i) *
+ aarch64_get_vec_float (cpu, vm, i) +
+ aarch64_get_vec_float (cpu, vd, i));
+ }
+}
+
+static void
+do_vec_max (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29] = SMAX (0) / UMAX (1)
+ instr[28,24] = 0 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 0110 01
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x19);
+
+ if (uimm (aarch64_get_instr (cpu), 29, 29))
+ {
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i)
+ > aarch64_get_vec_u8 (cpu, vm, i)
+ ? aarch64_get_vec_u8 (cpu, vn, i)
+ : aarch64_get_vec_u8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_vec_u16 (cpu, vn, i)
+ > aarch64_get_vec_u16 (cpu, vm, i)
+ ? aarch64_get_vec_u16 (cpu, vn, i)
+ : aarch64_get_vec_u16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vn, i)
+ > aarch64_get_vec_u32 (cpu, vm, i)
+ ? aarch64_get_vec_u32 (cpu, vn, i)
+ : aarch64_get_vec_u32 (cpu, vm, i));
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ }
+ else
+ {
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i,
+ aarch64_get_vec_s8 (cpu, vn, i)
+ > aarch64_get_vec_s8 (cpu, vm, i)
+ ? aarch64_get_vec_s8 (cpu, vn, i)
+ : aarch64_get_vec_s8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ aarch64_get_vec_s16 (cpu, vn, i)
+ > aarch64_get_vec_s16 (cpu, vm, i)
+ ? aarch64_get_vec_s16 (cpu, vn, i)
+ : aarch64_get_vec_s16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ aarch64_get_vec_s32 (cpu, vn, i)
+ > aarch64_get_vec_s32 (cpu, vm, i)
+ ? aarch64_get_vec_s32 (cpu, vn, i)
+ : aarch64_get_vec_s32 (cpu, vm, i));
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ }
+}
+
+static void
+do_vec_min (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full/half selector
+ instr[29] = SMIN (0) / UMIN (1)
+ instr[28,24] = 0 1110
+ instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
+ instr[21] = 1
+ instr[20,16] = Vn
+ instr[15,10] = 0110 11
+ instr[9,5] = Vm
+ instr[4.0] = Vd. */
+
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x1B);
+
+ if (uimm (aarch64_get_instr (cpu), 29, 29))
+ {
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i)
+ < aarch64_get_vec_u8 (cpu, vm, i)
+ ? aarch64_get_vec_u8 (cpu, vn, i)
+ : aarch64_get_vec_u8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_vec_u16 (cpu, vn, i)
+ < aarch64_get_vec_u16 (cpu, vm, i)
+ ? aarch64_get_vec_u16 (cpu, vn, i)
+ : aarch64_get_vec_u16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vn, i)
+ < aarch64_get_vec_u32 (cpu, vm, i)
+ ? aarch64_get_vec_u32 (cpu, vn, i)
+ : aarch64_get_vec_u32 (cpu, vm, i));
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ }
+ else
+ {
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i,
+ aarch64_get_vec_s8 (cpu, vn, i)
+ < aarch64_get_vec_s8 (cpu, vm, i)
+ ? aarch64_get_vec_s8 (cpu, vn, i)
+ : aarch64_get_vec_s8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ aarch64_get_vec_s16 (cpu, vn, i)
+ < aarch64_get_vec_s16 (cpu, vm, i)
+ ? aarch64_get_vec_s16 (cpu, vn, i)
+ : aarch64_get_vec_s16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ aarch64_get_vec_s32 (cpu, vn, i)
+ < aarch64_get_vec_s32 (cpu, vm, i)
+ ? aarch64_get_vec_s32 (cpu, vn, i)
+ : aarch64_get_vec_s32 (cpu, vm, i));
+ return;
+
+ default:
+ case 3:
+ HALT_UNALLOC;
+ }
+ }
+}
+
+static void
+do_vec_sub_long (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = lower (0) / upper (1)
+ instr[29] = signed (0) / unsigned (1)
+ instr[28,24] = 0 1110
+ instr[23,22] = size: bytes (00), half (01), word (10)
+ instr[21] = 1
+ insrt[20,16] = Vm
+ instr[15,10] = 0010 00
+ instr[9,5] = Vn
+ instr[4,0] = V dest. */
+
+ unsigned size = uimm (aarch64_get_instr (cpu), 23, 22);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned bias = 0;
+ unsigned i;
+
+ NYI_assert (28, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x08);
+
+ if (size == 3)
+ HALT_UNALLOC;
+
+ switch (uimm (aarch64_get_instr (cpu), 30, 29))
+ {
+ case 2: /* SSUBL2. */
+ bias = 2;
+ case 0: /* SSUBL. */
+ switch (size)
+ {
+ case 0:
+ bias *= 3;
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ aarch64_get_vec_s8 (cpu, vn, i + bias)
+ - aarch64_get_vec_s8 (cpu, vm, i + bias));
+ break;
+
+ case 1:
+ bias *= 2;
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ aarch64_get_vec_s16 (cpu, vn, i + bias)
+ - aarch64_get_vec_s16 (cpu, vm, i + bias));
+ break;
+
+ case 2:
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i,
+ aarch64_get_vec_s32 (cpu, vn, i + bias)
+ - aarch64_get_vec_s32 (cpu, vm, i + bias));
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+
+ case 3: /* USUBL2. */
+ bias = 2;
+ case 1: /* USUBL. */
+ switch (size)
+ {
+ case 0:
+ bias *= 3;
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vn, i + bias)
+ - aarch64_get_vec_u8 (cpu, vm, i + bias));
+ break;
+
+ case 1:
+ bias *= 2;
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u16 (cpu, vn, i + bias)
+ - aarch64_get_vec_u16 (cpu, vm, i + bias));
+ break;
+
+ case 2:
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_u64 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vn, i + bias)
+ - aarch64_get_vec_u32 (cpu, vm, i + bias));
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+ }
+}
+
+#define DO_ADDP(FN) \
+ do \
+ { \
+ for (i = 0; i < range; i++) \
+ { \
+ aarch64_set_vec_##FN (cpu, vd, i, \
+ aarch64_get_vec_##FN (cpu, vn, i * 2) \
+ + aarch64_get_vec_##FN (cpu, vn, i * 2 + 1)); \
+ aarch64_set_vec_##FN (cpu, vd, i + range, \
+ aarch64_get_vec_##FN (cpu, vm, i * 2) \
+ + aarch64_get_vec_##FN (cpu, vm, i * 2 + 1)); \
+ } \
+ } \
+ while (0)
+
+static void
+do_vec_ADDP (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 00 1110
+ instr[23,22] = size: bytes (00), half (01), word (10), long (11)
+ instr[21] = 1
+ insrt[20,16] = Vm
+ instr[15,10] = 1011 11
+ instr[9,5] = Vn
+ instr[4,0] = V dest. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 23, 22);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i, range;
+
+ NYI_assert (29, 24, 0x0E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x2F);
+
+ switch (size)
+ {
+ case 0:
+ range = full ? 8 : 4;
+ DO_ADDP (u8);
+ return;
+
+ case 1:
+ range = full ? 4 : 2;
+ DO_ADDP (u16);
+ return;
+
+ case 2:
+ range = full ? 2 : 1;
+ DO_ADDP (u32);
+ return;
+
+ case 3:
+ if (! full)
+ HALT_UNALLOC;
+ range = 1;
+ DO_ADDP (u64);
+ return;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_UMOV (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = 32-bit(0)/64-bit(1)
+ instr[29,21] = 00 1110 000
+ insrt[20,16] = size & index
+ instr[15,10] = 0011 11
+ instr[9,5] = V source
+ instr[4,0] = R dest. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned index;
+
+ NYI_assert (29, 21, 0x070);
+ NYI_assert (15, 10, 0x0F);
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ /* Byte transfer. */
+ index = uimm (aarch64_get_instr (cpu), 20, 17);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_vec_u8 (cpu, vs, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 17, 17))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 18);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_vec_u16 (cpu, vs, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 18, 18))
+ {
+ index = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_vec_u32 (cpu, vs, index));
+ }
+ else
+ {
+ if (uimm (aarch64_get_instr (cpu), 30, 30) != 1)
+ HALT_UNALLOC;
+
+ index = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_vec_u64 (cpu, vs, index));
+ }
+}
+
+static void
+do_vec_FABS (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,23] = 00 1110 1
+ instr[22] = float(0)/double(1)
+ instr[21,16] = 10 0000
+ instr[15,10] = 1111 10
+ instr[9,5] = Vn
+ instr[4,0] = Vd. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x1D);
+ NYI_assert (21, 10, 0x83E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_NYI;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ fabs (aarch64_get_vec_double (cpu, vn, i)));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ fabsf (aarch64_get_vec_float (cpu, vn, i)));
+ }
+}
+
+static void
+do_vec_FCVTZS (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0) / all (1)
+ instr[29,23] = 00 1110 1
+ instr[22] = single (0) / double (1)
+ instr[21,10] = 10 0001 1011 10
+ instr[9,5] = Rn
+ instr[4,0] = Rd. */
+
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned i;
+
+ NYI_assert (31, 31, 0);
+ NYI_assert (29, 23, 0x1D);
+ NYI_assert (21, 10, 0x86E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, rd, i,
+ (int64_t) aarch64_get_vec_double (cpu, rn, i));
+ }
+ else
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, rd, i,
+ (int32_t) aarch64_get_vec_float (cpu, rn, i));
+}
+
+static void
+do_vec_op1 (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half/full
+ instr[29,24] = 00 1110
+ instr[23,21] = ???
+ instr[20,16] = Vm
+ instr[15,10] = sub-opcode
+ instr[9,5] = Vn
+ instr[4,0] = Vd */
+ NYI_assert (29, 24, 0x0E);
+
+ if (uimm (aarch64_get_instr (cpu), 21, 21) == 0)
+ {
+ if (uimm (aarch64_get_instr (cpu), 23, 22) == 0)
+ {
+ if (uimm (aarch64_get_instr (cpu), 30, 30) == 1
+ && uimm (aarch64_get_instr (cpu), 17, 14) == 0
+ && uimm (aarch64_get_instr (cpu), 12, 10) == 7)
+ return do_vec_ins_2 (cpu);
+
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
+ case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
+ case 0x07: do_vec_INS (cpu); return;
+ case 0x0A: do_vec_TRN (cpu); return;
+
+ case 0x0F:
+ if (uimm (aarch64_get_instr (cpu), 17, 16) == 0)
+ {
+ do_vec_MOV_into_scalar (cpu);
+ return;
+ }
+ break;
+
+ case 0x00:
+ case 0x08:
+ case 0x10:
+ case 0x18:
+ do_vec_TBL (cpu); return;
+
+ case 0x06:
+ case 0x16:
+ do_vec_UZP (cpu); return;
+
+ case 0x0E:
+ case 0x1E:
+ do_vec_ZIP (cpu); return;
+
+ default:
+ HALT_NYI;
+ }
+ }
+
+ switch (uimm (aarch64_get_instr (cpu), 13, 10))
+ {
+ case 0x6: do_vec_UZP (cpu); return;
+ case 0xE: do_vec_ZIP (cpu); return;
+ case 0xA: do_vec_TRN (cpu); return;
+ case 0xF: do_vec_UMOV (cpu); return;
+ default: HALT_NYI;
+ }
+ }
+
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x07:
+ switch (uimm (aarch64_get_instr (cpu), 23, 21))
+ {
+ case 1: do_vec_AND (cpu); return;
+ case 3: do_vec_BIC (cpu); return;
+ case 5: do_vec_ORR (cpu); return;
+ case 7: do_vec_ORN (cpu); return;
+ default: HALT_NYI;
+ }
+
+ case 0x08: do_vec_sub_long (cpu); return;
+ case 0x0a: do_vec_XTN (cpu); return;
+ case 0x11: do_vec_SSHL (cpu); return;
+ case 0x19: do_vec_max (cpu); return;
+ case 0x1B: do_vec_min (cpu); return;
+ case 0x21: do_vec_add (cpu); return;
+ case 0x25: do_vec_MLA (cpu); return;
+ case 0x27: do_vec_mul (cpu); return;
+ case 0x2F: do_vec_ADDP (cpu); return;
+ case 0x30: do_vec_mull (cpu); return;
+ case 0x33: do_vec_FMLA (cpu); return;
+ case 0x35: do_vec_fadd (cpu); return;
+
+ case 0x2E:
+ switch (uimm (aarch64_get_instr (cpu), 20, 16))
+ {
+ case 0x00: do_vec_ABS (cpu); return;
+ case 0x01: do_vec_FCVTZS (cpu); return;
+ case 0x11: do_vec_ADDV (cpu); return;
+ default: HALT_NYI;
+ }
+
+ case 0x31:
+ case 0x3B:
+ do_vec_Fminmax (cpu); return;
+
+ case 0x0D:
+ case 0x0F:
+ case 0x22:
+ case 0x23:
+ case 0x26:
+ case 0x2A:
+ case 0x32:
+ case 0x36:
+ case 0x39:
+ case 0x3A:
+ do_vec_compare (cpu); return;
+
+ case 0x3E:
+ do_vec_FABS (cpu); return;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_xtl (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
+ instr[28,22] = 0 1111 00
+ instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
+ instr[15,10] = 1010 01
+ instr[9,5] = V source
+ instr[4,0] = V dest. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i, shift, bias = 0;
+
+ NYI_assert (28, 22, 0x3C);
+ NYI_assert (15, 10, 0x29);
+
+ switch (uimm (aarch64_get_instr (cpu), 30, 29))
+ {
+ case 2: /* SXTL2, SSHLL2. */
+ bias = 2;
+ case 0: /* SXTL, SSHLL. */
+ if (uimm (aarch64_get_instr (cpu), 21, 21))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 20, 16);
+ aarch64_set_vec_s64
+ (cpu, vd, 0, aarch64_get_vec_s32 (cpu, vs, bias) << shift);
+ aarch64_set_vec_s64
+ (cpu, vd, 1, aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift);
+ }
+ else if (uimm (aarch64_get_instr (cpu), 20, 20))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 19, 16);
+ bias *= 2;
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_s32
+ (cpu, vd, i, aarch64_get_vec_s16 (cpu, vs, i + bias) << shift);
+ }
+ else
+ {
+ NYI_assert (19, 19, 1);
+
+ shift = uimm (aarch64_get_instr (cpu), 18, 16);
+ bias *= 3;
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_s16
+ (cpu, vd, i, aarch64_get_vec_s8 (cpu, vs, i + bias) << shift);
+ }
+ return;
+
+ case 3: /* UXTL2, USHLL2. */
+ bias = 2;
+ case 1: /* UXTL, USHLL. */
+ if (uimm (aarch64_get_instr (cpu), 21, 21))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 20, 16);
+ aarch64_set_vec_u64
+ (cpu, vd, 0, aarch64_get_vec_u32 (cpu, vs, bias) << shift);
+ aarch64_set_vec_u64
+ (cpu, vd, 1, aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift);
+ }
+ else if (uimm (aarch64_get_instr (cpu), 20, 20))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 19, 16);
+ bias *= 2;
+ for (i = 0; i < 4; i++)
+ aarch64_set_vec_u32
+ (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i + bias) << shift);
+ }
+ else
+ {
+ NYI_assert (19, 19, 1);
+
+ shift = uimm (aarch64_get_instr (cpu), 18, 16);
+ bias *= 3;
+ for (i = 0; i < 8; i++)
+ aarch64_set_vec_u16
+ (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, i + bias) << shift);
+ }
+ return;
+
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_SHL (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,23] = 001 1110
+ instr [22,16] = size and shift amount
+ instr [15,10] = 01 0101
+ instr [9, 5] = Vs
+ instr [4, 0] = Vd. */
+
+ int shift;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x1E);
+ NYI_assert (15, 10, 0x15);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 21, 16) - 1;
+
+ if (full == 0)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ {
+ uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
+ aarch64_set_vec_u64 (cpu, vd, i, val << shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 21, 21))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 20, 16) - 1;
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
+ aarch64_set_vec_u32 (cpu, vd, i, val << shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 20, 20))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 19, 16) - 1;
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
+ aarch64_set_vec_u16 (cpu, vd, i, val << shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 19, 19) == 0)
+ HALT_UNALLOC;
+
+ shift = uimm (aarch64_get_instr (cpu), 18, 16) - 1;
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
+ aarch64_set_vec_u8 (cpu, vd, i, val << shift);
+ }
+}
+
+static void
+do_vec_SSHR_USHR (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29] = signed(0)/unsigned(1)
+ instr [28,23] = 01 1110
+ instr [22,16] = size and shift amount
+ instr [15,10] = 0000 01
+ instr [9, 5] = Vs
+ instr [4, 0] = Vd. */
+
+ int shift;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int sign = uimm (aarch64_get_instr (cpu), 29, 29);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (28, 23, 0x1E);
+ NYI_assert (15, 10, 0x01);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 21, 16);
+
+ if (full == 0)
+ HALT_UNALLOC;
+
+ if (sign)
+ for (i = 0; i < 2; i++)
+ {
+ int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
+ aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
+ }
+ else
+ for (i = 0; i < 2; i++)
+ {
+ uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
+ aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 21, 21))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 20, 16);
+
+ if (sign)
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
+ aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
+ }
+ else
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
+ aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 20, 20))
+ {
+ shift = uimm (aarch64_get_instr (cpu), 19, 16);
+
+ if (sign)
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
+ aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
+ }
+ else
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
+ aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
+ }
+
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 19, 19) == 0)
+ HALT_UNALLOC;
+
+ shift = uimm (aarch64_get_instr (cpu), 18, 16);
+
+ if (sign)
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
+ aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
+ }
+ else
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
+ aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
+ }
+}
+
+static void
+do_vec_op2 (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half/full
+ instr[29,24] = 00 1111
+ instr[23] = ?
+ instr[22,16] = element size & index
+ instr[15,10] = sub-opcode
+ instr[9,5] = Vm
+ instr[4.0] = Vd */
+
+ NYI_assert (29, 24, 0x0F);
+
+ if (uimm (aarch64_get_instr (cpu), 23, 23) != 0)
+ HALT_NYI;
+
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x01: do_vec_SSHR_USHR (cpu); return;
+ case 0x15: do_vec_SHL (cpu); return;
+ case 0x29: do_vec_xtl (cpu); return;
+ default: HALT_NYI;
+ }
+}
+
+static void
+do_vec_neg (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full(1)/half(0)
+ instr[29,24] = 10 1110
+ instr[23,22] = size: byte(00), half (01), word (10), long (11)
+ instr[21,10] = 1000 0010 1110
+ instr[9,5] = Vs
+ instr[4,0] = Vd */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x2E);
+ NYI_assert (21, 10, 0x82E);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
+ return;
+
+ case 3:
+ if (! full)
+ HALT_NYI;
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+static void
+do_vec_sqrt (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = full(1)/half(0)
+ instr[29,23] = 101 1101
+ instr[22] = single(0)/double(1)
+ instr[21,10] = 1000 0111 1110
+ instr[9,5] = Vs
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x5B);
+ NYI_assert (21, 10, 0x87E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22) == 0)
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ sqrtf (aarch64_get_vec_float (cpu, vs, i)));
+ else
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ sqrt (aarch64_get_vec_double (cpu, vs, i)));
+}
+
+static void
+do_vec_mls_indexed (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,24] = 10 1111
+ instr[23,22] = 16-bit(01)/32-bit(10)
+ instr[21,20+11] = index (if 16-bit)
+ instr[21+11] = index (if 32-bit)
+ instr[20,16] = Vm
+ instr[15,12] = 0100
+ instr[11] = part of index
+ instr[10] = 0
+ instr[9,5] = Vs
+ instr[4,0] = Vd. */
+
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned i;
+
+ NYI_assert (15, 12, 4);
+ NYI_assert (10, 10, 0);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 1:
+ {
+ unsigned elem;
+ uint32_t val;
+
+ if (vm > 15)
+ HALT_NYI;
+
+ elem = (uimm (aarch64_get_instr (cpu), 21, 20) << 1)
+ | uimm (aarch64_get_instr (cpu), 11, 11);
+ val = aarch64_get_vec_u16 (cpu, vm, elem);
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vd, i) -
+ (aarch64_get_vec_u32 (cpu, vs, i) * val));
+ return;
+ }
+
+ case 2:
+ {
+ unsigned elem = (uimm (aarch64_get_instr (cpu), 21, 21) << 1)
+ | uimm (aarch64_get_instr (cpu), 11, 11);
+ uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u64 (cpu, vd, i,
+ aarch64_get_vec_u64 (cpu, vd, i) -
+ (aarch64_get_vec_u64 (cpu, vs, i) * val));
+ return;
+ }
+
+ case 0:
+ case 3:
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+do_vec_SUB (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,24] = 10 1110
+ instr [23,22] = size: byte(00, half(01), word (10), long (11)
+ instr [21] = 1
+ instr [20,16] = Vm
+ instr [15,10] = 10 0001
+ instr [9, 5] = Vn
+ instr [4, 0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x2E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x21);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_s8 (cpu, vd, i,
+ aarch64_get_vec_s8 (cpu, vn, i)
+ - aarch64_get_vec_s8 (cpu, vm, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_s16 (cpu, vd, i,
+ aarch64_get_vec_s16 (cpu, vn, i)
+ - aarch64_get_vec_s16 (cpu, vm, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_s32 (cpu, vd, i,
+ aarch64_get_vec_s32 (cpu, vn, i)
+ - aarch64_get_vec_s32 (cpu, vm, i));
+ return;
+
+ case 3:
+ if (full == 0)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_s64 (cpu, vd, i,
+ aarch64_get_vec_s64 (cpu, vn, i)
+ - aarch64_get_vec_s64 (cpu, vm, i));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+static void
+do_vec_MLS (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,24] = 10 1110
+ instr [23,22] = size: byte(00, half(01), word (10)
+ instr [21] = 1
+ instr [20,16] = Vm
+ instr [15,10] = 10 0101
+ instr [9, 5] = Vn
+ instr [4, 0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 24, 0x2E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x25);
+
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0:
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ (aarch64_get_vec_u8 (cpu, vn, i)
+ * aarch64_get_vec_u8 (cpu, vm, i))
+ - aarch64_get_vec_u8 (cpu, vd, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ (aarch64_get_vec_u16 (cpu, vn, i)
+ * aarch64_get_vec_u16 (cpu, vm, i))
+ - aarch64_get_vec_u16 (cpu, vd, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ (aarch64_get_vec_u32 (cpu, vn, i)
+ * aarch64_get_vec_u32 (cpu, vm, i))
+ - aarch64_get_vec_u32 (cpu, vd, i));
+ return;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+do_vec_FDIV (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,23] = 10 1110 0
+ instr [22] = float()/double(1)
+ instr [21] = 1
+ instr [20,16] = Vm
+ instr [15,10] = 1111 11
+ instr [9, 5] = Vn
+ instr [4, 0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x5C);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x3F);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ aarch64_get_vec_double (cpu, vn, i)
+ / aarch64_get_vec_double (cpu, vm, i));
+ }
+ else
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ aarch64_get_vec_float (cpu, vn, i)
+ / aarch64_get_vec_float (cpu, vm, i));
+}
+
+static void
+do_vec_FMUL (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,23] = 10 1110 0
+ instr [22] = float(0)/double(1)
+ instr [21] = 1
+ instr [20,16] = Vm
+ instr [15,10] = 1101 11
+ instr [9, 5] = Vn
+ instr [4, 0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ NYI_assert (29, 23, 0x5C);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x37);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ aarch64_get_vec_double (cpu, vn, i)
+ * aarch64_get_vec_double (cpu, vm, i));
+ }
+ else
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ aarch64_get_vec_float (cpu, vn, i)
+ * aarch64_get_vec_float (cpu, vm, i));
+}
+
+static void
+do_vec_FADDP (sim_cpu *cpu)
+{
+ /* instr [31] = 0
+ instr [30] = half(0)/full(1)
+ instr [29,23] = 10 1110 0
+ instr [22] = float(0)/double(1)
+ instr [21] = 1
+ instr [20,16] = Vm
+ instr [15,10] = 1101 01
+ instr [9, 5] = Vn
+ instr [4, 0] = Vd. */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (29, 23, 0x5C);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x35);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ aarch64_set_vec_double (cpu, vd, 0, aarch64_get_vec_double (cpu, vn, 0)
+ + aarch64_get_vec_double (cpu, vn, 1));
+ aarch64_set_vec_double (cpu, vd, 1, aarch64_get_vec_double (cpu, vm, 0)
+ + aarch64_get_vec_double (cpu, vm, 1));
+ }
+ else
+ {
+ aarch64_set_vec_float (cpu, vd, 0, aarch64_get_vec_float (cpu, vn, 0)
+ + aarch64_get_vec_float (cpu, vn, 1));
+ if (full)
+ aarch64_set_vec_float (cpu, vd, 1, aarch64_get_vec_float (cpu, vn, 2)
+ + aarch64_get_vec_float (cpu, vn, 3));
+ aarch64_set_vec_float (cpu, vd, full ? 2 : 1,
+ aarch64_get_vec_float (cpu, vm, 0)
+ + aarch64_get_vec_float (cpu, vm, 1));
+ if (full)
+ aarch64_set_vec_float (cpu, vd, 3,
+ aarch64_get_vec_float (cpu, vm, 2)
+ + aarch64_get_vec_float (cpu, vm, 3));
+ }
+}
+
+static void
+do_vec_FSQRT (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half(0)/full(1)
+ instr[29,23] = 10 1110 1
+ instr[22] = single(0)/double(1)
+ instr[21,10] = 10 0001 1111 10
+ instr[9,5] = Vsrc
+ instr[4,0] = Vdest. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int i;
+
+ NYI_assert (29, 23, 0x5D);
+ NYI_assert (21, 10, 0x87E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ sqrt (aarch64_get_vec_double (cpu, vn, i)));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ sqrtf (aarch64_get_vec_float (cpu, vn, i)));
+ }
+}
+
+static void
+do_vec_FNEG (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,23] = 10 1110 1
+ instr[22] = single (0)/double (1)
+ instr[21,10] = 10 0000 1111 10
+ instr[9,5] = Vsrc
+ instr[4,0] = Vdest. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ int i;
+
+ NYI_assert (29, 23, 0x5D);
+ NYI_assert (21, 10, 0x83E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ if (! full)
+ HALT_UNALLOC;
+
+ for (i = 0; i < 2; i++)
+ aarch64_set_vec_double (cpu, vd, i,
+ - aarch64_get_vec_double (cpu, vn, i));
+ }
+ else
+ {
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_float (cpu, vd, i,
+ - aarch64_get_vec_float (cpu, vn, i));
+ }
+}
+
+static void
+do_vec_NOT (sim_cpu *cpu)
+{
+ /* instr[31] = 0
+ instr[30] = half (0)/full (1)
+ instr[29,21] = 10 1110 001
+ instr[20,16] = 0 0000
+ instr[15,10] = 0101 10
+ instr[9,5] = Vn
+ instr[4.0] = Vd. */
+
+ unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+ int full = uimm (aarch64_get_instr (cpu), 30, 30);
+
+ NYI_assert (29, 10, 0xB8816);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
+}
+
+static void
+do_vec_MOV_element (sim_cpu *cpu)
+{
+ /* instr[31,21] = 0110 1110 000
+ instr[20,16] = size & dest index
+ instr[15] = 0
+ instr[14,11] = source index
+ instr[10] = 1
+ instr[9,5] = Vs
+ instr[4.0] = Vd. */
+
+ unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned src_index;
+ unsigned dst_index;
+
+ NYI_assert (31, 21, 0x370);
+ NYI_assert (15, 15, 0);
+ NYI_assert (10, 10, 1);
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ /* Move a byte. */
+ src_index = uimm (aarch64_get_instr (cpu), 14, 11);
+ dst_index = uimm (aarch64_get_instr (cpu), 20, 17);
+ aarch64_set_vec_u8 (cpu, vd, dst_index,
+ aarch64_get_vec_u8 (cpu, vs, src_index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 17, 17))
+ {
+ /* Move 16-bits. */
+ NYI_assert (11, 11, 0);
+ src_index = uimm (aarch64_get_instr (cpu), 14, 12);
+ dst_index = uimm (aarch64_get_instr (cpu), 20, 18);
+ aarch64_set_vec_u16 (cpu, vd, dst_index,
+ aarch64_get_vec_u16 (cpu, vs, src_index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 18, 18))
+ {
+ /* Move 32-bits. */
+ NYI_assert (12, 11, 0);
+ src_index = uimm (aarch64_get_instr (cpu), 14, 13);
+ dst_index = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_vec_u32 (cpu, vd, dst_index,
+ aarch64_get_vec_u32 (cpu, vs, src_index));
+ }
+ else
+ {
+ NYI_assert (19, 19, 1);
+ NYI_assert (13, 11, 0);
+ src_index = uimm (aarch64_get_instr (cpu), 14, 14);
+ dst_index = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_vec_u64 (cpu, vd, dst_index,
+ aarch64_get_vec_u64 (cpu, vs, src_index));
+ }
+}
+
+static void
+dexAdvSIMD0 (sim_cpu *cpu)
+{
+ /* instr [28,25] = 0 111. */
+ if ( uimm (aarch64_get_instr (cpu), 15, 10) == 0x07
+ && (uimm (aarch64_get_instr (cpu), 9, 5) ==
+ uimm (aarch64_get_instr (cpu), 20, 16)))
+ {
+ if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x075
+ || uimm (aarch64_get_instr (cpu), 31, 21) == 0x275)
+ {
+ do_vec_MOV_whole_vector (cpu);
+ return;
+ }
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1E0)
+ {
+ do_vec_MOV_immediate (cpu);
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x5E0)
+ {
+ do_vec_MVNI (cpu);
+ return;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C0
+ || uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C1)
+ {
+ if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x03)
+ {
+ do_vec_DUP_scalar_into_vector (cpu);
+ return;
+ }
+ }
+
+ switch (uimm (aarch64_get_instr (cpu), 29, 24))
+ {
+ case 0x0E: do_vec_op1 (cpu); return;
+ case 0x0F: do_vec_op2 (cpu); return;
+
+ case 0x2f:
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x01: do_vec_SSHR_USHR (cpu); return;
+ case 0x10:
+ case 0x12: do_vec_mls_indexed (cpu); return;
+ case 0x29: do_vec_xtl (cpu); return;
+ default:
+ HALT_NYI;
+ }
+
+ case 0x2E:
+ if (uimm (aarch64_get_instr (cpu), 21, 21) == 1)
+ {
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x07:
+ switch (uimm (aarch64_get_instr (cpu), 23, 22))
+ {
+ case 0: do_vec_EOR (cpu); return;
+ case 1: do_vec_BSL (cpu); return;
+ case 2:
+ case 3: do_vec_bit (cpu); return;
+ }
+ break;
+
+ case 0x08: do_vec_sub_long (cpu); return;
+ case 0x11: do_vec_USHL (cpu); return;
+ case 0x16: do_vec_NOT (cpu); return;
+ case 0x19: do_vec_max (cpu); return;
+ case 0x1B: do_vec_min (cpu); return;
+ case 0x21: do_vec_SUB (cpu); return;
+ case 0x25: do_vec_MLS (cpu); return;
+ case 0x31: do_vec_FminmaxNMP (cpu); return;
+ case 0x35: do_vec_FADDP (cpu); return;
+ case 0x37: do_vec_FMUL (cpu); return;
+ case 0x3F: do_vec_FDIV (cpu); return;
+
+ case 0x3E:
+ switch (uimm (aarch64_get_instr (cpu), 20, 16))
+ {
+ case 0x00: do_vec_FNEG (cpu); return;
+ case 0x01: do_vec_FSQRT (cpu); return;
+ default: HALT_NYI;
+ }
+
+ case 0x0D:
+ case 0x0F:
+ case 0x22:
+ case 0x23:
+ case 0x26:
+ case 0x2A:
+ case 0x32:
+ case 0x36:
+ case 0x39:
+ case 0x3A:
+ do_vec_compare (cpu); return;
+
+ default: break;
+ }
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x370)
+ {
+ do_vec_MOV_element (cpu);
+ return;
+ }
+
+ switch (uimm (aarch64_get_instr (cpu), 21, 10))
+ {
+ case 0x82E: do_vec_neg (cpu); return;
+ case 0x87E: do_vec_sqrt (cpu); return;
+ default:
+ if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x30)
+ {
+ do_vec_mull (cpu);
+ return;
+ }
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ HALT_NYI;
+}
+
+/* 3 sources. */
+
+/* Float multiply add. */
+static void
+fmadds (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
+ + aarch64_get_FP_float (cpu, sn)
+ * aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double multiply add. */
+static void
+fmaddd (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
+ + aarch64_get_FP_double (cpu, sn)
+ * aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float multiply subtract. */
+static void
+fmsubs (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
+ - aarch64_get_FP_float (cpu, sn)
+ * aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double multiply subtract. */
+static void
+fmsubd (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
+ - aarch64_get_FP_double (cpu, sn)
+ * aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float negative multiply add. */
+static void
+fnmadds (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
+ + (- aarch64_get_FP_float (cpu, sn))
+ * aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double negative multiply add. */
+static void
+fnmaddd (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
+ + (- aarch64_get_FP_double (cpu, sn))
+ * aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float negative multiply subtract. */
+static void
+fnmsubs (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
+ + aarch64_get_FP_float (cpu, sn)
+ * aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double negative multiply subtract. */
+static void
+fnmsubd (sim_cpu *cpu)
+{
+ unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
+ + aarch64_get_FP_double (cpu, sn)
+ * aarch64_get_FP_double (cpu, sm));
+}
+
+static void
+dexSimpleFPDataProc3Source (sim_cpu *cpu)
+{
+ /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
+ instr[30] = 0
+ instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
+ instr[28,25] = 1111
+ instr[24] = 1
+ instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
+ instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
+ instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
+
+ uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1)
+ | uimm (aarch64_get_instr (cpu), 29, 29);
+ /* dispatch on combined type:o1:o2. */
+ uint32_t dispatch = (uimm (aarch64_get_instr (cpu), 23, 21) << 1)
+ | uimm (aarch64_get_instr (cpu), 15, 15);
+
+ if (M_S != 0)
+ HALT_UNALLOC;
+
+ switch (dispatch)
+ {
+ case 0: fmadds (cpu); return;
+ case 1: fmsubs (cpu); return;
+ case 2: fnmadds (cpu); return;
+ case 3: fnmsubs (cpu); return;
+ case 4: fmaddd (cpu); return;
+ case 5: fmsubd (cpu); return;
+ case 6: fnmaddd (cpu); return;
+ case 7: fnmsubd (cpu); return;
+ default:
+ /* type > 1 is currently unallocated. */
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexSimpleFPFixedConvert (sim_cpu *cpu)
+{
+ HALT_NYI;
+}
+
+static void
+dexSimpleFPCondCompare (sim_cpu *cpu)
+{
+ HALT_NYI;
+}
+
+/* 2 sources. */
+
+/* Float add. */
+static void
+fadds (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
+ + aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double add. */
+static void
+faddd (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
+ + aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float divide. */
+static void
+fdivs (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
+ / aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double divide. */
+static void
+fdivd (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
+ / aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float multiply. */
+static void
+fmuls (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
+ * aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double multiply. */
+static void
+fmuld (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
+ * aarch64_get_FP_double (cpu, sm));
+}
+
+/* Float negate and multiply. */
+static void
+fnmuls (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
+ * aarch64_get_FP_float (cpu, sm)));
+}
+
+/* Double negate and multiply. */
+static void
+fnmuld (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
+ * aarch64_get_FP_double (cpu, sm)));
+}
+
+/* Float subtract. */
+static void
+fsubs (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
+ - aarch64_get_FP_float (cpu, sm));
+}
+
+/* Double subtract. */
+static void
+fsubd (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
+ - aarch64_get_FP_double (cpu, sm));
+}
+
+static void
+do_FMINNM (sim_cpu *cpu)
+{
+ /* instr[31,23] = 0 0011 1100
+ instr[22] = float(0)/double(1)
+ instr[21] = 1
+ instr[20,16] = Sm
+ instr[15,10] = 01 1110
+ instr[9,5] = Sn
+ instr[4,0] = Cpu */
+
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x03C);
+ NYI_assert (15, 10, 0x1E);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, sd,
+ dminnm (aarch64_get_FP_double (cpu, sn),
+ aarch64_get_FP_double (cpu, sm)));
+ else
+ aarch64_set_FP_float (cpu, sd,
+ fminnm (aarch64_get_FP_float (cpu, sn),
+ aarch64_get_FP_float (cpu, sm)));
+}
+
+static void
+do_FMAXNM (sim_cpu *cpu)
+{
+ /* instr[31,23] = 0 0011 1100
+ instr[22] = float(0)/double(1)
+ instr[21] = 1
+ instr[20,16] = Sm
+ instr[15,10] = 01 1010
+ instr[9,5] = Sn
+ instr[4,0] = Cpu */
+
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x03C);
+ NYI_assert (15, 10, 0x1A);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, sd,
+ dmaxnm (aarch64_get_FP_double (cpu, sn),
+ aarch64_get_FP_double (cpu, sm)));
+ else
+ aarch64_set_FP_float (cpu, sd,
+ fmaxnm (aarch64_get_FP_float (cpu, sn),
+ aarch64_get_FP_float (cpu, sm)));
+}
+
+static void
+dexSimpleFPDataProc2Source (sim_cpu *cpu)
+{
+ /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
+ instr[30] = 0
+ instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
+ instr[28,25] = 1111
+ instr[24] = 0
+ instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
+ instr[21] = 1
+ instr[20,16] = Vm
+ instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
+ 0010 ==> FADD, 0011 ==> FSUB,
+ 0100 ==> FMAX, 0101 ==> FMIN
+ 0110 ==> FMAXNM, 0111 ==> FMINNM
+ 1000 ==> FNMUL, ow ==> UNALLOC
+ instr[11,10] = 10
+ instr[9,5] = Vn
+ instr[4,0] = Vd */
+
+ uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1)
+ | uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22);
+ /* Dispatch on opcode. */
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 15, 12);
+
+ if (type > 1)
+ HALT_UNALLOC;
+
+ if (M_S != 0)
+ HALT_UNALLOC;
+
+ if (type)
+ switch (dispatch)
+ {
+ case 0: fmuld (cpu); return;
+ case 1: fdivd (cpu); return;
+ case 2: faddd (cpu); return;
+ case 3: fsubd (cpu); return;
+ case 6: do_FMAXNM (cpu); return;
+ case 7: do_FMINNM (cpu); return;
+ case 8: fnmuld (cpu); return;
+
+ /* Have not yet implemented fmax and fmin. */
+ case 4:
+ case 5:
+ HALT_NYI;
+
+ default:
+ HALT_UNALLOC;
+ }
+ else /* type == 0 => floats. */
+ switch (dispatch)
+ {
+ case 0: fmuls (cpu); return;
+ case 1: fdivs (cpu); return;
+ case 2: fadds (cpu); return;
+ case 3: fsubs (cpu); return;
+ case 6: do_FMAXNM (cpu); return;
+ case 7: do_FMINNM (cpu); return;
+ case 8: fnmuls (cpu); return;
+
+ case 4:
+ case 5:
+ HALT_NYI;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexSimpleFPCondSelect (sim_cpu *cpu)
+{
+ /* FCSEL
+ instr[31,23] = 0 0011 1100
+ instr[22] = 0=>single 1=>double
+ instr[21] = 1
+ instr[20,16] = Sm
+ instr[15,12] = cond
+ instr[11,10] = 11
+ instr[9,5] = Sn
+ instr[4,0] = Cpu */
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t set = testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12));
+
+ NYI_assert (31, 23, 0x03C);
+ NYI_assert (11, 10, 0x3);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, sd, set ? sn : sm);
+ else
+ aarch64_set_FP_float (cpu, sd, set ? sn : sm);
+}
+
+/* Store 32 bit unscaled signed 9 bit. */
+static void
+fsturs (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_float (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
+ aarch64_get_FP_float (cpu, rn));
+}
+
+/* Store 64 bit unscaled signed 9 bit. */
+static void
+fsturd (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_mem_double (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
+ aarch64_get_FP_double (cpu, rn));
+}
+
+/* Store 128 bit unscaled signed 9 bit. */
+static void
+fsturq (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+ FRegister a;
+
+ aarch64_get_FP_long_double (cpu, rn, & a);
+ aarch64_set_mem_long_double (cpu,
+ aarch64_get_reg_u64 (cpu, st, 1)
+ + offset, a);
+}
+
+/* TODO FP move register. */
+
+/* 32 bit fp to fp move register. */
+static void
+ffmovs (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
+}
+
+/* 64 bit fp to fp move register. */
+static void
+ffmovd (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
+}
+
+/* 32 bit GReg to Vec move register. */
+static void
+fgmovs (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
+}
+
+/* 64 bit g to fp move register. */
+static void
+fgmovd (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
+}
+
+/* 32 bit fp to g move register. */
+static void
+gfmovs (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
+}
+
+/* 64 bit fp to g move register. */
+static void
+gfmovd (sim_cpu *cpu)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
+}
+
+/* FP move immediate
+
+ These install an immediate 8 bit value in the target register
+ where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
+ bit exponent. */
+
+static void
+fmovs (sim_cpu *cpu)
+{
+ unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13);
+ float f = fp_immediate_for_encoding_32 (imm);
+
+ aarch64_set_FP_float (cpu, sd, f);
+}
+
+static void
+fmovd (sim_cpu *cpu)
+{
+ unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13);
+ double d = fp_immediate_for_encoding_64 (imm);
+
+ aarch64_set_FP_double (cpu, sd, d);
+}
+
+static void
+dexSimpleFPImmediate (sim_cpu *cpu)
+{
+ /* instr[31,23] == 00111100
+ instr[22] == type : single(0)/double(1)
+ instr[21] == 1
+ instr[20,13] == imm8
+ instr[12,10] == 100
+ instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
+ instr[4,0] == Rd */
+ uint32_t imm5 = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ NYI_assert (31, 23, 0x3C);
+
+ if (imm5 != 0)
+ HALT_UNALLOC;
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ fmovd (cpu);
+ else
+ fmovs (cpu);
+}
+
+/* TODO specific decode and execute for group Load Store. */
+
+/* TODO FP load/store single register (unscaled offset). */
+
+/* TODO load 8 bit unscaled signed 9 bit. */
+/* TODO load 16 bit unscaled signed 9 bit. */
+
+/* Load 32 bit unscaled signed 9 bit. */
+static void
+fldurs (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, st, aarch64_get_mem_float
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
+}
+
+/* Load 64 bit unscaled signed 9 bit. */
+static void
+fldurd (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, st, aarch64_get_mem_double
+ (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
+}
+
+/* Load 128 bit unscaled signed 9 bit. */
+static void
+fldurq (sim_cpu *cpu, int32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0);
+ FRegister a;
+ uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
+
+ aarch64_get_mem_long_double (cpu, addr, & a);
+ aarch64_set_FP_long_double (cpu, st, a);
+}
+
+/* TODO store 8 bit unscaled signed 9 bit. */
+/* TODO store 16 bit unscaled signed 9 bit. */
+
+
+/* 1 source. */
+
+/* Float absolute value. */
+static void
+fabss (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+ float value = aarch64_get_FP_float (cpu, sn);
+
+ aarch64_set_FP_float (cpu, sd, fabsf (value));
+}
+
+/* Double absolute value. */
+static void
+fabcpu (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+ double value = aarch64_get_FP_double (cpu, sn);
+
+ aarch64_set_FP_double (cpu, sd, fabs (value));
+}
+
+/* Float negative value. */
+static void
+fnegs (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
+}
+
+/* Double negative value. */
+static void
+fnegd (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
+}
+
+/* Float square root. */
+static void
+fsqrts (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
+}
+
+/* Double square root. */
+static void
+fsqrtd (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd,
+ sqrt (aarch64_get_FP_double (cpu, sn)));
+}
+
+/* Convert double to float. */
+static void
+fcvtds (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
+}
+
+/* Convert float to double. */
+static void
+fcvtcpu (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
+}
+
+static void
+do_FRINT (sim_cpu *cpu)
+{
+ /* instr[31,23] = 0001 1110 0
+ instr[22] = single(0)/double(1)
+ instr[21,18] = 1001
+ instr[17,15] = rounding mode
+ instr[14,10] = 10000
+ instr[9,5] = source
+ instr[4,0] = dest */
+
+ float val;
+ unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned int rmode = uimm (aarch64_get_instr (cpu), 17, 15);
+
+ NYI_assert (31, 23, 0x03C);
+ NYI_assert (21, 18, 0x9);
+ NYI_assert (14, 10, 0x10);
+
+ if (rmode == 6 || rmode == 7)
+ /* FIXME: Add support for rmode == 6 exactness check. */
+ rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double val = aarch64_get_FP_double (cpu, rs);
+
+ switch (rmode)
+ {
+ case 0: /* mode N: nearest or even. */
+ {
+ double rval = round (val);
+
+ if (val - rval == 0.5)
+ {
+ if (((rval / 2.0) * 2.0) != rval)
+ rval += 1.0;
+ }
+
+ aarch64_set_FP_double (cpu, rd, round (val));
+ return;
+ }
+
+ case 1: /* mode P: towards +inf. */
+ if (val < 0.0)
+ aarch64_set_FP_double (cpu, rd, trunc (val));
+ else
+ aarch64_set_FP_double (cpu, rd, round (val));
+ return;
+
+ case 2: /* mode M: towards -inf. */
+ if (val < 0.0)
+ aarch64_set_FP_double (cpu, rd, round (val));
+ else
+ aarch64_set_FP_double (cpu, rd, trunc (val));
+ return;
+
+ case 3: /* mode Z: towards 0. */
+ aarch64_set_FP_double (cpu, rd, trunc (val));
+ return;
+
+ case 4: /* mode A: away from 0. */
+ aarch64_set_FP_double (cpu, rd, round (val));
+ return;
+
+ case 6: /* mode X: use FPCR with exactness check. */
+ case 7: /* mode I: use FPCR mode. */
+ HALT_NYI;
+
+ default:
+ HALT_UNALLOC;
+ }
+ }
+
+ val = aarch64_get_FP_float (cpu, rs);
+
+ switch (rmode)
+ {
+ case 0: /* mode N: nearest or even. */
+ {
+ float rval = roundf (val);
+
+ if (val - rval == 0.5)
+ {
+ if (((rval / 2.0) * 2.0) != rval)
+ rval += 1.0;
+ }
+
+ aarch64_set_FP_float (cpu, rd, rval);
+ return;
+ }
+
+ case 1: /* mode P: towards +inf. */
+ if (val < 0.0)
+ aarch64_set_FP_float (cpu, rd, truncf (val));
+ else
+ aarch64_set_FP_float (cpu, rd, roundf (val));
+ return;
+
+ case 2: /* mode M: towards -inf. */
+ if (val < 0.0)
+ aarch64_set_FP_float (cpu, rd, truncf (val));
+ else
+ aarch64_set_FP_float (cpu, rd, roundf (val));
+ return;
+
+ case 3: /* mode Z: towards 0. */
+ aarch64_set_FP_float (cpu, rd, truncf (val));
+ return;
+
+ case 4: /* mode A: away from 0. */
+ aarch64_set_FP_float (cpu, rd, roundf (val));
+ return;
+
+ case 6: /* mode X: use FPCR with exactness check. */
+ case 7: /* mode I: use FPCR mode. */
+ HALT_NYI;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexSimpleFPDataProc1Source (sim_cpu *cpu)
+{
+ /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
+ instr[30] = 0
+ instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
+ instr[28,25] = 1111
+ instr[24] = 0
+ instr[23,22] ==> type : 00 ==> source is single,
+ 01 ==> source is double
+ 10 ==> UNALLOC
+ 11 ==> UNALLOC or source is half
+ instr[21] = 1
+ instr[20,15] ==> opcode : with type 00 or 01
+ 000000 ==> FMOV, 000001 ==> FABS,
+ 000010 ==> FNEG, 000011 ==> FSQRT,
+ 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
+ 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
+ 001000 ==> FRINTN, 001001 ==> FRINTP,
+ 001010 ==> FRINTM, 001011 ==> FRINTZ,
+ 001100 ==> FRINTA, 001101 ==> UNALLOC
+ 001110 ==> FRINTX, 001111 ==> FRINTI
+ with type 11
+ 000100 ==> FCVT (half-to-single)
+ 000101 ==> FCVT (half-to-double)
+ instr[14,10] = 10000. */
+
+ uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1)
+ | uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22);
+ uint32_t opcode = uimm (aarch64_get_instr (cpu), 20, 15);
+
+ if (M_S != 0)
+ HALT_UNALLOC;
+
+ if (type == 3)
+ {
+ if (opcode == 4 || opcode == 5)
+ HALT_NYI;
+ else
+ HALT_UNALLOC;
+ }
+
+ if (type == 2)
+ HALT_UNALLOC;
+
+ switch (opcode)
+ {
+ case 0:
+ if (type)
+ ffmovd (cpu);
+ else
+ ffmovs (cpu);
+ return;
+
+ case 1:
+ if (type)
+ fabcpu (cpu);
+ else
+ fabss (cpu);
+ return;
+
+ case 2:
+ if (type)
+ fnegd (cpu);
+ else
+ fnegs (cpu);
+ return;
+
+ case 3:
+ if (type)
+ fsqrtd (cpu);
+ else
+ fsqrts (cpu);
+ return;
+
+ case 4:
+ if (type)
+ fcvtds (cpu);
+ else
+ HALT_UNALLOC;
+ return;
+
+ case 5:
+ if (type)
+ HALT_UNALLOC;
+ fcvtcpu (cpu);
+ return;
+
+ case 8: /* FRINTN etc. */
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 14:
+ case 15:
+ do_FRINT (cpu);
+ return;
+
+ case 7: /* FCVT double/single to half precision. */
+ case 13:
+ HALT_NYI;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+/* 32 bit signed int to float. */
+static void
+scvtf32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float
+ (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
+}
+
+/* signed int to float. */
+static void
+scvtf (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_float
+ (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
+}
+
+/* 32 bit signed int to double. */
+static void
+scvtd32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double
+ (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
+}
+
+/* signed int to double. */
+static void
+scvtd (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_FP_double
+ (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
+}
+
+static const float FLOAT_INT_MAX = (float) INT_MAX;
+static const float FLOAT_INT_MIN = (float) INT_MIN;
+static const double DOUBLE_INT_MAX = (double) INT_MAX;
+static const double DOUBLE_INT_MIN = (double) INT_MIN;
+static const float FLOAT_LONG_MAX = (float) LONG_MAX;
+static const float FLOAT_LONG_MIN = (float) LONG_MIN;
+static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
+static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
+
+/* Check for FP exception conditions:
+ NaN raises IO
+ Infinity raises IO
+ Out of Range raises IO and IX and saturates value
+ Denormal raises ID and IX and sets to zero. */
+#define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
+ do \
+ { \
+ switch (fpclassify (F)) \
+ { \
+ case FP_INFINITE: \
+ case FP_NAN: \
+ aarch64_set_FPSR (cpu, IO); \
+ if (signbit (F)) \
+ VALUE = ITYPE##_MAX; \
+ else \
+ VALUE = ITYPE##_MIN; \
+ break; \
+ \
+ case FP_NORMAL: \
+ if (F >= FTYPE##_##ITYPE##_MAX) \
+ { \
+ aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
+ VALUE = ITYPE##_MAX; \
+ } \
+ else if (F <= FTYPE##_##ITYPE##_MIN) \
+ { \
+ aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
+ VALUE = ITYPE##_MIN; \
+ } \
+ break; \
+ \
+ case FP_SUBNORMAL: \
+ aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
+ VALUE = 0; \
+ break; \
+ \
+ default: \
+ case FP_ZERO: \
+ VALUE = 0; \
+ break; \
+ } \
+ } \
+ while (0)
+
+/* 32 bit convert float to signed int truncate towards zero. */
+static void
+fcvtszs32 (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* TODO : check that this rounds toward zero. */
+ float f = aarch64_get_FP_float (cpu, sn);
+ int32_t value = (int32_t) f;
+
+ RAISE_EXCEPTIONS (f, value, FLOAT, INT);
+
+ /* Avoid sign extension to 64 bit. */
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
+}
+
+/* 64 bit convert float to signed int truncate towards zero. */
+static void
+fcvtszs (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ float f = aarch64_get_FP_float (cpu, sn);
+ int64_t value = (int64_t) f;
+
+ RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
+
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
+}
+
+/* 32 bit convert double to signed int truncate towards zero. */
+static void
+fcvtszd32 (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* TODO : check that this rounds toward zero. */
+ double d = aarch64_get_FP_double (cpu, sn);
+ int32_t value = (int32_t) d;
+
+ RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
+
+ /* Avoid sign extension to 64 bit. */
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
+}
+
+/* 64 bit convert double to signed int truncate towards zero. */
+static void
+fcvtszd (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* TODO : check that this rounds toward zero. */
+ double d = aarch64_get_FP_double (cpu, sn);
+ int64_t value;
+
+ value = (int64_t) d;
+
+ RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
+
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
+}
+
+static void
+do_fcvtzu (sim_cpu *cpu)
+{
+ /* instr[31] = size: 32-bit (0), 64-bit (1)
+ instr[30,23] = 00111100
+ instr[22] = type: single (0)/ double (1)
+ instr[21] = enable (0)/disable(1) precision
+ instr[20,16] = 11001
+ instr[15,10] = precision
+ instr[9,5] = Rs
+ instr[4,0] = Rd. */
+
+ unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (30, 23, 0x3C);
+ NYI_assert (20, 16, 0x19);
+
+ if (uimm (aarch64_get_instr (cpu), 21, 21) != 1)
+ /* Convert to fixed point. */
+ HALT_NYI;
+
+ if (uimm (aarch64_get_instr (cpu), 31, 31))
+ {
+ /* Convert to unsigned 64-bit integer. */
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double d = aarch64_get_FP_double (cpu, rs);
+ uint64_t value = (uint64_t) d;
+
+ /* Do not raise an exception if we have reached ULONG_MAX. */
+ if (value != (1UL << 63))
+ RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
+ }
+ else
+ {
+ float f = aarch64_get_FP_float (cpu, rs);
+ uint64_t value = (uint64_t) f;
+
+ /* Do not raise an exception if we have reached ULONG_MAX. */
+ if (value != (1UL << 63))
+ RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
+ }
+ }
+ else
+ {
+ uint32_t value;
+
+ /* Convert to unsigned 32-bit integer. */
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double d = aarch64_get_FP_double (cpu, rs);
+
+ value = (uint32_t) d;
+ /* Do not raise an exception if we have reached UINT_MAX. */
+ if (value != (1UL << 31))
+ RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
+ }
+ else
+ {
+ float f = aarch64_get_FP_float (cpu, rs);
+
+ value = (uint32_t) f;
+ /* Do not raise an exception if we have reached UINT_MAX. */
+ if (value != (1UL << 31))
+ RAISE_EXCEPTIONS (f, value, FLOAT, INT);
+ }
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
+ }
+}
+
+static void
+do_UCVTF (sim_cpu *cpu)
+{
+ /* instr[31] = size: 32-bit (0), 64-bit (1)
+ instr[30,23] = 001 1110 0
+ instr[22] = type: single (0)/ double (1)
+ instr[21] = enable (0)/disable(1) precision
+ instr[20,16] = 0 0011
+ instr[15,10] = precision
+ instr[9,5] = Rs
+ instr[4,0] = Rd. */
+
+ unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (30, 23, 0x3C);
+ NYI_assert (20, 16, 0x03);
+
+ if (uimm (aarch64_get_instr (cpu), 21, 21) != 1)
+ HALT_NYI;
+
+ /* FIXME: Add exception raising. */
+ if (uimm (aarch64_get_instr (cpu), 31, 31))
+ {
+ uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, rd, (double) value);
+ else
+ aarch64_set_FP_float (cpu, rd, (float) value);
+ }
+ else
+ {
+ uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, rd, (double) value);
+ else
+ aarch64_set_FP_float (cpu, rd, (float) value);
+ }
+}
+
+static void
+float_vector_move (sim_cpu *cpu)
+{
+ /* instr[31,17] == 100 1111 0101 0111
+ instr[16] ==> direction 0=> to GR, 1=> from GR
+ instr[15,10] => ???
+ instr[9,5] ==> source
+ instr[4,0] ==> dest. */
+
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 17, 0x4F57);
+
+ if (uimm (aarch64_get_instr (cpu), 15, 10) != 0)
+ HALT_UNALLOC;
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
+ else
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
+}
+
+static void
+dexSimpleFPIntegerConvert (sim_cpu *cpu)
+{
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30 = 0
+ instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
+ instr[28,25] = 1111
+ instr[24] = 0
+ instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
+ instr[21] = 1
+ instr[20,19] = rmode
+ instr[18,16] = opcode
+ instr[15,10] = 10 0000 */
+
+ uint32_t rmode_opcode;
+ uint32_t size_type;
+ uint32_t type;
+ uint32_t size;
+ uint32_t S;
+
+ if (uimm (aarch64_get_instr (cpu), 31, 17) == 0x4F57)
+ {
+ float_vector_move (cpu);
+ return;
+ }
+
+ size = uimm (aarch64_get_instr (cpu), 31, 31);
+ S = uimm (aarch64_get_instr (cpu), 29, 29);
+ if (S != 0)
+ HALT_UNALLOC;
+
+ type = uimm (aarch64_get_instr (cpu), 23, 22);
+ if (type > 1)
+ HALT_UNALLOC;
+
+ rmode_opcode = uimm (aarch64_get_instr (cpu), 20, 16);
+ size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
+
+ switch (rmode_opcode)
+ {
+ case 2: /* SCVTF. */
+ switch (size_type)
+ {
+ case 0: scvtf32 (cpu); return;
+ case 1: scvtd32 (cpu); return;
+ case 2: scvtf (cpu); return;
+ case 3: scvtd (cpu); return;
+ default:
+ HALT_UNREACHABLE;
+ }
+
+ case 6: /* FMOV GR, Vec. */
+ switch (size_type)
+ {
+ case 0: gfmovs (cpu); return;
+ case 3: gfmovd (cpu); return;
+ default: HALT_UNALLOC;
+ }
+
+ case 7: /* FMOV vec, GR. */
+ switch (size_type)
+ {
+ case 0: fgmovs (cpu); return;
+ case 3: fgmovd (cpu); return;
+ default: HALT_UNALLOC;
+ }
+
+ case 24: /* FCVTZS. */
+ switch (size_type)
+ {
+ case 0: fcvtszs32 (cpu); return;
+ case 1: fcvtszd32 (cpu); return;
+ case 2: fcvtszs (cpu); return;
+ case 3: fcvtszd (cpu); return;
+ default: HALT_UNREACHABLE;
+ }
+
+ case 25: do_fcvtzu (cpu); return;
+ case 3: do_UCVTF (cpu); return;
+
+ case 0: /* FCVTNS. */
+ case 1: /* FCVTNU. */
+ case 4: /* FCVTAS. */
+ case 5: /* FCVTAU. */
+ case 8: /* FCVPTS. */
+ case 9: /* FCVTPU. */
+ case 16: /* FCVTMS. */
+ case 17: /* FCVTMU. */
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
+{
+ uint32_t flags;
+
+ if (isnan (fvalue1) || isnan (fvalue2))
+ flags = C|V;
+ else
+ {
+ float result = fvalue1 - fvalue2;
+
+ if (result == 0.0)
+ flags = Z|C;
+ else if (result < 0)
+ flags = N;
+ else /* (result > 0). */
+ flags = C;
+ }
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+static void
+fcmps (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ float fvalue1 = aarch64_get_FP_float (cpu, sn);
+ float fvalue2 = aarch64_get_FP_float (cpu, sm);
+
+ set_flags_for_float_compare (cpu, fvalue1, fvalue2);
+}
+
+/* Float compare to zero -- Invalid Operation exception
+ only on signaling NaNs. */
+static void
+fcmpzs (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ float fvalue1 = aarch64_get_FP_float (cpu, sn);
+
+ set_flags_for_float_compare (cpu, fvalue1, 0.0f);
+}
+
+/* Float compare -- Invalid Operation exception on all NaNs. */
+static void
+fcmpes (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ float fvalue1 = aarch64_get_FP_float (cpu, sn);
+ float fvalue2 = aarch64_get_FP_float (cpu, sm);
+
+ set_flags_for_float_compare (cpu, fvalue1, fvalue2);
+}
+
+/* Float compare to zero -- Invalid Operation exception on all NaNs. */
+static void
+fcmpzes (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ float fvalue1 = aarch64_get_FP_float (cpu, sn);
+
+ set_flags_for_float_compare (cpu, fvalue1, 0.0f);
+}
+
+static void
+set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
+{
+ uint32_t flags;
+
+ if (isnan (dval1) || isnan (dval2))
+ flags = C|V;
+ else
+ {
+ double result = dval1 - dval2;
+
+ if (result == 0.0)
+ flags = Z|C;
+ else if (result < 0)
+ flags = N;
+ else /* (result > 0). */
+ flags = C;
+ }
+
+ aarch64_set_CPSR (cpu, flags);
+}
+
+/* Double compare -- Invalid Operation exception only on signaling NaNs. */
+static void
+fcmpd (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ double dvalue1 = aarch64_get_FP_double (cpu, sn);
+ double dvalue2 = aarch64_get_FP_double (cpu, sm);
+
+ set_flags_for_double_compare (cpu, dvalue1, dvalue2);
+}
+
+/* Double compare to zero -- Invalid Operation exception
+ only on signaling NaNs. */
+static void
+fcmpzd (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ double dvalue1 = aarch64_get_FP_double (cpu, sn);
+
+ set_flags_for_double_compare (cpu, dvalue1, 0.0);
+}
+
+/* Double compare -- Invalid Operation exception on all NaNs. */
+static void
+fcmped (sim_cpu *cpu)
+{
+ unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ double dvalue1 = aarch64_get_FP_double (cpu, sn);
+ double dvalue2 = aarch64_get_FP_double (cpu, sm);
+
+ set_flags_for_double_compare (cpu, dvalue1, dvalue2);
+}
+
+/* Double compare to zero -- Invalid Operation exception on all NaNs. */
+static void
+fcmpzed (sim_cpu *cpu)
+{
+ unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5);
+ double dvalue1 = aarch64_get_FP_double (cpu, sn);
+
+ set_flags_for_double_compare (cpu, dvalue1, 0.0);
+}
+
+static void
+dexSimpleFPCompare (sim_cpu *cpu)
+{
+ /* assert instr[28,25] == 1111
+ instr[30:24:21:13,10] = 0011000
+ instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
+ instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
+ instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
+ instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
+ instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
+ 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
+ ow ==> UNALLOC */
+ uint32_t dispatch;
+ uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1)
+ | uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22);
+ uint32_t op = uimm (aarch64_get_instr (cpu), 15, 14);
+ uint32_t op2_2_0 = uimm (aarch64_get_instr (cpu), 2, 0);
+
+ if (op2_2_0 != 0)
+ HALT_UNALLOC;
+
+ if (M_S != 0)
+ HALT_UNALLOC;
+
+ if (type > 1)
+ HALT_UNALLOC;
+
+ if (op != 0)
+ HALT_UNALLOC;
+
+ /* dispatch on type and top 2 bits of opcode. */
+ dispatch = (type << 2) | uimm (aarch64_get_instr (cpu), 4, 3);
+
+ switch (dispatch)
+ {
+ case 0: fcmps (cpu); return;
+ case 1: fcmpzs (cpu); return;
+ case 2: fcmpes (cpu); return;
+ case 3: fcmpzes (cpu); return;
+ case 4: fcmpd (cpu); return;
+ case 5: fcmpzd (cpu); return;
+ case 6: fcmped (cpu); return;
+ case 7: fcmpzed (cpu); return;
+ default: HALT_UNREACHABLE;
+ }
+}
+
+static void
+do_scalar_FADDP (sim_cpu *cpu)
+{
+ /* instr [31,23] = 011111100
+ instr [22] = single(0)/double(1)
+ instr [21,10] = 1100 0011 0110
+ instr [9,5] = Fn
+ instr [4,0] = Fd. */
+
+ unsigned Fn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned Fd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x0FC);
+ NYI_assert (21, 10, 0xC36);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double val1 = aarch64_get_vec_double (cpu, Fn, 0);
+ double val2 = aarch64_get_vec_double (cpu, Fn, 1);
+
+ aarch64_set_FP_double (cpu, Fd, val1 + val2);
+ }
+ else
+ {
+ float val1 = aarch64_get_vec_float (cpu, Fn, 0);
+ float val2 = aarch64_get_vec_float (cpu, Fn, 1);
+
+ aarch64_set_FP_float (cpu, Fd, val1 + val2);
+ }
+}
+
+/* Floating point absolute difference. */
+
+static void
+do_scalar_FABD (sim_cpu *cpu)
+{
+ /* instr [31,23] = 0111 1110 1
+ instr [22] = float(0)/double(1)
+ instr [21] = 1
+ instr [20,16] = Rm
+ instr [15,10] = 1101 01
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x0FD);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 10, 0x35);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ aarch64_set_FP_double (cpu, rd,
+ fabs (aarch64_get_FP_double (cpu, rn)
+ - aarch64_get_FP_double (cpu, rm)));
+ else
+ aarch64_set_FP_float (cpu, rd,
+ fabsf (aarch64_get_FP_float (cpu, rn)
+ - aarch64_get_FP_float (cpu, rm)));
+}
+
+static void
+do_scalar_CMGT (sim_cpu *cpu)
+{
+ /* instr [31,21] = 0101 1110 111
+ instr [20,16] = Rm
+ instr [15,10] = 00 1101
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 21, 0x2F7);
+ NYI_assert (15, 10, 0x0D);
+
+ aarch64_set_vec_u64 (cpu, rd, 0,
+ aarch64_get_vec_u64 (cpu, rn, 0) >
+ aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
+}
+
+static void
+do_scalar_USHR (sim_cpu *cpu)
+{
+ /* instr [31,23] = 0111 1111 0
+ instr [22,16] = shift amount
+ instr [15,10] = 0000 01
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned amount = 128 - uimm (aarch64_get_instr (cpu), 22, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x0FE);
+ NYI_assert (15, 10, 0x01);
+
+ aarch64_set_vec_u64 (cpu, rd, 0,
+ aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
+}
+
+static void
+do_scalar_SHL (sim_cpu *cpu)
+{
+ /* instr [31,23] = 0111 1101 0
+ instr [22,16] = shift amount
+ instr [15,10] = 0101 01
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned amount = uimm (aarch64_get_instr (cpu), 22, 16) - 64;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 23, 0x0BE);
+ NYI_assert (15, 10, 0x15);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22) == 0)
+ HALT_UNALLOC;
+
+ aarch64_set_vec_u64 (cpu, rd, 0,
+ aarch64_get_vec_u64 (cpu, rn, 0) << amount);
+}
+
+/* FCMEQ FCMGT FCMGE. */
+static void
+do_scalar_FCM (sim_cpu *cpu)
+{
+ /* instr [31,30] = 01
+ instr [29] = U
+ instr [28,24] = 1 1110
+ instr [23] = E
+ instr [22] = size
+ instr [21] = 1
+ instr [20,16] = Rm
+ instr [15,12] = 1110
+ instr [11] = AC
+ instr [10] = 1
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned EUac = (uimm (aarch64_get_instr (cpu), 23, 23) << 2)
+ | (uimm (aarch64_get_instr (cpu), 29, 29) << 1)
+ | uimm (aarch64_get_instr (cpu), 11, 11);
+ unsigned result;
+ float val1;
+ float val2;
+
+ NYI_assert (31, 30, 1);
+ NYI_assert (28, 24, 0x1E);
+ NYI_assert (21, 21, 1);
+ NYI_assert (15, 12, 0xE);
+ NYI_assert (10, 10, 1);
+
+ if (uimm (aarch64_get_instr (cpu), 22, 22))
+ {
+ double val1 = aarch64_get_FP_double (cpu, rn);
+ double val2 = aarch64_get_FP_double (cpu, rm);
+
+ switch (EUac)
+ {
+ case 0: /* 000 */
+ result = val1 == val2;
+ break;
+
+ case 3: /* 011 */
+ val1 = fabs (val1);
+ val2 = fabs (val2);
+ /* Fall through. */
+ case 2: /* 010 */
+ result = val1 >= val2;
+ break;
+
+ case 7: /* 111 */
+ val1 = fabs (val1);
+ val2 = fabs (val2);
+ /* Fall through. */
+ case 6: /* 110 */
+ result = val1 > val2;
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+
+ aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
+ return;
+ }
+
+ val1 = aarch64_get_FP_float (cpu, rn);
+ val2 = aarch64_get_FP_float (cpu, rm);
+
+ switch (EUac)
+ {
+ case 0: /* 000 */
+ result = val1 == val2;
+ break;
+
+ case 3: /* 011 */
+ val1 = fabsf (val1);
+ val2 = fabsf (val2);
+ /* Fall through. */
+ case 2: /* 010 */
+ result = val1 >= val2;
+ break;
+
+ case 7: /* 111 */
+ val1 = fabsf (val1);
+ val2 = fabsf (val2);
+ /* Fall through. */
+ case 6: /* 110 */
+ result = val1 > val2;
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+
+ aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
+}
+
+/* An alias of DUP. */
+static void
+do_scalar_MOV (sim_cpu *cpu)
+{
+ /* instr [31,21] = 0101 1110 000
+ instr [20,16] = imm5
+ instr [15,10] = 0000 01
+ instr [9, 5] = Rn
+ instr [4, 0] = Rd. */
+
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned index;
+
+ NYI_assert (31, 21, 0x2F0);
+ NYI_assert (15, 10, 0x01);
+
+ if (uimm (aarch64_get_instr (cpu), 16, 16))
+ {
+ /* 8-bit. */
+ index = uimm (aarch64_get_instr (cpu), 20, 17);
+ aarch64_set_vec_u8
+ (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 17, 17))
+ {
+ /* 16-bit. */
+ index = uimm (aarch64_get_instr (cpu), 20, 18);
+ aarch64_set_vec_u16
+ (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 18, 18))
+ {
+ /* 32-bit. */
+ index = uimm (aarch64_get_instr (cpu), 20, 19);
+ aarch64_set_vec_u32
+ (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
+ }
+ else if (uimm (aarch64_get_instr (cpu), 19, 19))
+ {
+ /* 64-bit. */
+ index = uimm (aarch64_get_instr (cpu), 20, 20);
+ aarch64_set_vec_u64
+ (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
+ }
+ else
+ HALT_UNALLOC;
+}
+
+static void
+do_double_add (sim_cpu *cpu)
+{
+ /* instr [28,25] = 1111. */
+ unsigned Fd;
+ unsigned Fm;
+ unsigned Fn;
+ double val1;
+ double val2;
+
+ switch (uimm (aarch64_get_instr (cpu), 31, 23))
+ {
+ case 0xBC:
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x01: do_scalar_MOV (cpu); return;
+ case 0x39: do_scalar_FCM (cpu); return;
+ case 0x3B: do_scalar_FCM (cpu); return;
+ }
+ break;
+
+ case 0xBE: do_scalar_SHL (cpu); return;
+
+ case 0xFC:
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x36: do_scalar_FADDP (cpu); return;
+ case 0x39: do_scalar_FCM (cpu); return;
+ case 0x3B: do_scalar_FCM (cpu); return;
+ }
+ break;
+
+ case 0xFD:
+ switch (uimm (aarch64_get_instr (cpu), 15, 10))
+ {
+ case 0x0D: do_scalar_CMGT (cpu); return;
+ case 0x35: do_scalar_FABD (cpu); return;
+ case 0x39: do_scalar_FCM (cpu); return;
+ case 0x3B: do_scalar_FCM (cpu); return;
+ default:
+ HALT_NYI;
+ }
+
+ case 0xFE: do_scalar_USHR (cpu); return;
+ default:
+ break;
+ }
+
+ /* instr [31,21] = 0101 1110 111
+ instr [20,16] = Fn
+ instr [15,10] = 1000 01
+ instr [9,5] = Fm
+ instr [4,0] = Fd. */
+ if (uimm (aarch64_get_instr (cpu), 31, 21) != 0x2F7
+ || uimm (aarch64_get_instr (cpu), 15, 10) != 0x21)
+ HALT_NYI;
+
+ Fd = uimm (aarch64_get_instr (cpu), 4, 0);
+ Fm = uimm (aarch64_get_instr (cpu), 9, 5);
+ Fn = uimm (aarch64_get_instr (cpu), 20, 16);
+
+ val1 = aarch64_get_FP_double (cpu, Fm);
+ val2 = aarch64_get_FP_double (cpu, Fn);
+
+ aarch64_set_FP_double (cpu, Fd, val1 + val2);
+}
+
+static void
+dexAdvSIMD1 (sim_cpu *cpu)
+{
+ /* instr [28,25] = 1 111. */
+
+ /* we are currently only interested in the basic
+ scalar fp routines which all have bit 30 = 0. */
+ if (uimm (aarch64_get_instr (cpu), 30, 30))
+ do_double_add (cpu);
+
+ /* instr[24] is set for FP data processing 3-source and clear for
+ all other basic scalar fp instruction groups. */
+ else if (uimm (aarch64_get_instr (cpu), 24, 24))
+ dexSimpleFPDataProc3Source (cpu);
+
+ /* instr[21] is clear for floating <-> fixed conversions and set for
+ all other basic scalar fp instruction groups. */
+ else if (!uimm (aarch64_get_instr (cpu), 21, 21))
+ dexSimpleFPFixedConvert (cpu);
+
+ /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
+ 11 ==> cond select, 00 ==> other. */
+ else
+ switch (uimm (aarch64_get_instr (cpu), 11, 10))
+ {
+ case 1: dexSimpleFPCondCompare (cpu); return;
+ case 2: dexSimpleFPDataProc2Source (cpu); return;
+ case 3: dexSimpleFPCondSelect (cpu); return;
+
+ default:
+ /* Now an ordered cascade of tests.
+ FP immediate has aarch64_get_instr (cpu)[12] == 1.
+ FP compare has aarch64_get_instr (cpu)[13] == 1.
+ FP Data Proc 1 Source has aarch64_get_instr (cpu)[14] == 1.
+ FP floating <--> integer conversions has aarch64_get_instr (cpu)[15] == 0. */
+ if (uimm (aarch64_get_instr (cpu), 12, 12))
+ dexSimpleFPImmediate (cpu);
+
+ else if (uimm (aarch64_get_instr (cpu), 13, 13))
+ dexSimpleFPCompare (cpu);
+
+ else if (uimm (aarch64_get_instr (cpu), 14, 14))
+ dexSimpleFPDataProc1Source (cpu);
+
+ else if (!uimm (aarch64_get_instr (cpu), 15, 15))
+ dexSimpleFPIntegerConvert (cpu);
+
+ else
+ /* If we get here then instr[15] == 1 which means UNALLOC. */
+ HALT_UNALLOC;
+ }
+}
+
+/* PC relative addressing. */
+
+static void
+pcadr (sim_cpu *cpu)
+{
+ /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
+ instr[30,29] = immlo
+ instr[23,5] = immhi. */
+ uint64_t address;
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t isPage = uimm (aarch64_get_instr (cpu), 31, 31);
+ union { int64_t u64; uint64_t s64; } imm;
+ uint64_t offset;
+
+ imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
+ offset = imm.u64;
+ offset = (offset << 2) | uimm (aarch64_get_instr (cpu), 30, 29);
+
+ address = aarch64_get_PC (cpu);
+
+ if (isPage)
+ {
+ offset <<= 12;
+ address &= ~0xfff;
+ }
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
+}
+
+/* Specific decode and execute for group Data Processing Immediate. */
+
+static void
+dexPCRelAddressing (sim_cpu *cpu)
+{
+ /* assert instr[28,24] = 10000. */
+ pcadr (cpu);
+}
+
+/* Immediate logical.
+ The bimm32/64 argument is constructed by replicating a 2, 4, 8,
+ 16, 32 or 64 bit sequence pulled out at decode and possibly
+ inverting it..
+
+ N.B. the output register (dest) can normally be Xn or SP
+ the exception occurs for flag setting instructions which may
+ only use Xn for the output (dest). The input register can
+ never be SP. */
+
+/* 32 bit and immediate. */
+static void
+and32 (sim_cpu *cpu, uint32_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
+}
+
+/* 64 bit and immediate. */
+static void
+and64 (sim_cpu *cpu, uint64_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
+}
+
+/* 32 bit and immediate set flags. */
+static void
+ands32 (sim_cpu *cpu, uint32_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = bimm;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop32 (cpu, value1 & value2);
+}
+
+/* 64 bit and immediate set flags. */
+static void
+ands64 (sim_cpu *cpu, uint64_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = bimm;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop64 (cpu, value1 & value2);
+}
+
+/* 32 bit exclusive or immediate. */
+static void
+eor32 (sim_cpu *cpu, uint32_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
+}
+
+/* 64 bit exclusive or immediate. */
+static void
+eor64 (sim_cpu *cpu, uint64_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
+}
+
+/* 32 bit or immediate. */
+static void
+orr32 (sim_cpu *cpu, uint32_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
+}
+
+/* 64 bit or immediate. */
+static void
+orr64 (sim_cpu *cpu, uint64_t bimm)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, SP_OK,
+ aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
+}
+
+/* Logical shifted register.
+ These allow an optional LSL, ASR, LSR or ROR to the second source
+ register with a count up to the register bit count.
+ N.B register args may not be SP. */
+
+/* 32 bit AND shifted register. */
+static void
+and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit AND shifted register. */
+static void
+and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 32 bit AND shifted register setting flags. */
+static void
+ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop32 (cpu, value1 & value2);
+}
+
+/* 64 bit AND shifted register setting flags. */
+static void
+ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop64 (cpu, value1 & value2);
+}
+
+/* 32 bit BIC shifted register. */
+static void
+bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit BIC shifted register. */
+static void
+bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 32 bit BIC shifted register setting flags. */
+static void
+bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop32 (cpu, value1 & value2);
+}
+
+/* 64 bit BIC shifted register setting flags. */
+static void
+bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
+ shift, count);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
+ set_flags_for_binop64 (cpu, value1 & value2);
+}
+
+/* 32 bit EON shifted register. */
+static void
+eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit EON shifted register. */
+static void
+eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 32 bit EOR shifted register. */
+static void
+eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit EOR shifted register. */
+static void
+eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 32 bit ORR shifted register. */
+static void
+orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit ORR shifted register. */
+static void
+orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 32 bit ORN shifted register. */
+static void
+orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
+}
+
+/* 64 bit ORN shifted register. */
+static void
+orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
+}
+
+static void
+dexLogicalImmediate (sim_cpu *cpu)
+{
+ /* assert instr[28,23] = 1001000
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
+ instr[22] = N : used to construct immediate mask
+ instr[21,16] = immr
+ instr[15,10] = imms
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22);
+ /* uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16);. */
+ /* uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10);. */
+ uint32_t index = uimm (aarch64_get_instr (cpu), 22, 10);
+ uint64_t bimm64 = LITable [index];
+ uint32_t dispatch = uimm (aarch64_get_instr (cpu), 30, 29);
+
+ if (~size & N)
+ HALT_UNALLOC;
+
+ if (!bimm64)
+ HALT_UNALLOC;
+
+ if (size == 0)
+ {
+ uint32_t bimm = (uint32_t) bimm64;
+
+ switch (dispatch)
+ {
+ case 0: and32 (cpu, bimm); return;
+ case 1: orr32 (cpu, bimm); return;
+ case 2: eor32 (cpu, bimm); return;
+ case 3: ands32 (cpu, bimm); return;
+ }
+ }
+ else
+ {
+ switch (dispatch)
+ {
+ case 0: and64 (cpu, bimm64); return;
+ case 1: orr64 (cpu, bimm64); return;
+ case 2: eor64 (cpu, bimm64); return;
+ case 3: ands64 (cpu, bimm64); return;
+ }
+ }
+ HALT_UNALLOC;
+}
+
+/* Immediate move.
+ The uimm argument is a 16 bit value to be inserted into the
+ target register the pos argument locates the 16 bit word in the
+ dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
+ 3} for 64 bit.
+ N.B register arg may not be SP so it should be.
+ accessed using the setGZRegisterXXX accessors. */
+
+/* 32 bit move 16 bit immediate zero remaining shorts. */
+static void
+movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
+}
+
+/* 64 bit move 16 bit immediate zero remaining shorts. */
+static void
+movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
+}
+
+/* 32 bit move 16 bit immediate negated. */
+static void
+movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
+}
+
+/* 64 bit move 16 bit immediate negated. */
+static void
+movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
+ ^ 0xffffffffffffffffULL));
+}
+
+/* 32 bit move 16 bit immediate keep remaining shorts. */
+static void
+movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
+ uint32_t value = val << (pos * 16);
+ uint32_t mask = ~(0xffffU << (pos * 16));
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
+}
+
+/* 64 bit move 16 it immediate keep remaining shorts. */
+static void
+movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
+{
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
+ uint64_t value = (uint64_t) val << (pos * 16);
+ uint64_t mask = ~(0xffffULL << (pos * 16));
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
+}
+
+static void
+dexMoveWideImmediate (sim_cpu *cpu)
+{
+ /* assert instr[28:23] = 100101
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
+ instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
+ instr[20,5] = uimm16
+ instr[4,0] = Rd */
+
+ /* N.B. the (multiple of 16) shift is applied by the called routine,
+ we just pass the multiplier. */
+
+ uint32_t imm;
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t op = uimm (aarch64_get_instr (cpu), 30, 29);
+ uint32_t shift = uimm (aarch64_get_instr (cpu), 22, 21);
+
+ /* 32 bit can only shift 0 or 1 lot of 16.
+ anything else is an unallocated instruction. */
+ if (size == 0 && (shift > 1))
+ HALT_UNALLOC;
+
+ if (op == 1)
+ HALT_UNALLOC;
+
+ imm = uimm (aarch64_get_instr (cpu), 20, 5);
+
+ if (size == 0)
+ {
+ if (op == 0)
+ movn32 (cpu, imm, shift);
+ else if (op == 2)
+ movz32 (cpu, imm, shift);
+ else
+ movk32 (cpu, imm, shift);
+ }
+ else
+ {
+ if (op == 0)
+ movn64 (cpu, imm, shift);
+ else if (op == 2)
+ movz64 (cpu, imm, shift);
+ else
+ movk64 (cpu, imm, shift);
+ }
+}
+
+/* Bitfield operations.
+ These take a pair of bit positions r and s which are in {0..31}
+ or {0..63} depending on the instruction word size.
+ N.B register args may not be SP. */
+
+/* OK, we start with ubfm which just needs to pick
+ some bits out of source zero the rest and write
+ the result to dest. Just need two logical shifts. */
+
+/* 32 bit bitfield move, left and right of affected zeroed
+ if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
+static void
+ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rd;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+
+ /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
+ if (r <= s)
+ {
+ /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
+ We want only bits s:xxx:r at the bottom of the word
+ so we LSL bit s up to bit 31 i.e. by 31 - s
+ and then we LSR to bring bit 31 down to bit s - r
+ i.e. by 31 + r - s. */
+ value <<= 31 - s;
+ value >>= 31 + r - s;
+ }
+ else
+ {
+ /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
+ We want only bits s:xxx:0 starting at it 31-(r-1)
+ so we LSL bit s up to bit 31 i.e. by 31 - s
+ and then we LSL to bring bit 31 down to 31-(r-1)+s
+ i.e. by r - (s + 1). */
+ value <<= 31 - s;
+ value >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
+}
+
+/* 64 bit bitfield move, left and right of affected zeroed
+ if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
+static void
+ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rd;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+
+ if (r <= s)
+ {
+ /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
+ We want only bits s:xxx:r at the bottom of the word.
+ So we LSL bit s up to bit 63 i.e. by 63 - s
+ and then we LSR to bring bit 63 down to bit s - r
+ i.e. by 63 + r - s. */
+ value <<= 63 - s;
+ value >>= 63 + r - s;
+ }
+ else
+ {
+ /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
+ We want only bits s:xxx:0 starting at it 63-(r-1).
+ So we LSL bit s up to bit 63 i.e. by 63 - s
+ and then we LSL to bring bit 63 down to 63-(r-1)+s
+ i.e. by r - (s + 1). */
+ value <<= 63 - s;
+ value >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
+}
+
+/* The signed versions need to insert sign bits
+ on the left of the inserted bit field. so we do
+ much the same as the unsigned version except we
+ use an arithmetic shift right -- this just means
+ we need to operate on signed values. */
+
+/* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
+/* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
+static void
+sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rd;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ /* as per ubfm32 but use an ASR instead of an LSR. */
+ int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
+
+ if (r <= s)
+ {
+ value <<= 31 - s;
+ value >>= 31 + r - s;
+ }
+ else
+ {
+ value <<= 31 - s;
+ value >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
+}
+
+/* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
+/* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
+static void
+sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rd;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ /* acpu per ubfm but use an ASR instead of an LSR. */
+ int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
+
+ if (r <= s)
+ {
+ value <<= 63 - s;
+ value >>= 63 + r - s;
+ }
+ else
+ {
+ value <<= 63 - s;
+ value >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
+}
+
+/* Finally, these versions leave non-affected bits
+ as is. so we need to generate the bits as per
+ ubfm and also generate a mask to pick the
+ bits from the original and computed values. */
+
+/* 32 bit bitfield move, non-affected bits left as is.
+ If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
+static void
+bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t mask = -1;
+ unsigned rd;
+ uint32_t value2;
+
+ /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
+ if (r <= s)
+ {
+ /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
+ We want only bits s:xxx:r at the bottom of the word
+ so we LSL bit s up to bit 31 i.e. by 31 - s
+ and then we LSR to bring bit 31 down to bit s - r
+ i.e. by 31 + r - s. */
+ value <<= 31 - s;
+ value >>= 31 + r - s;
+ /* the mask must include the same bits. */
+ mask <<= 31 - s;
+ mask >>= 31 + r - s;
+ }
+ else
+ {
+ /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
+ We want only bits s:xxx:0 starting at it 31-(r-1)
+ so we LSL bit s up to bit 31 i.e. by 31 - s
+ and then we LSL to bring bit 31 down to 31-(r-1)+s
+ i.e. by r - (s + 1). */
+ value <<= 31 - s;
+ value >>= r - (s + 1);
+ /* The mask must include the same bits. */
+ mask <<= 31 - s;
+ mask >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
+
+ value2 &= ~mask;
+ value2 |= value;
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
+}
+
+/* 64 bit bitfield move, non-affected bits left as is.
+ If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
+static void
+bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
+{
+ unsigned rd;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t mask = 0xffffffffffffffffULL;
+
+ if (r <= s)
+ {
+ /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
+ We want only bits s:xxx:r at the bottom of the word
+ so we LSL bit s up to bit 63 i.e. by 63 - s
+ and then we LSR to bring bit 63 down to bit s - r
+ i.e. by 63 + r - s. */
+ value <<= 63 - s;
+ value >>= 63 + r - s;
+ /* The mask must include the same bits. */
+ mask <<= 63 - s;
+ mask >>= 63 + r - s;
+ }
+ else
+ {
+ /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
+ We want only bits s:xxx:0 starting at it 63-(r-1)
+ so we LSL bit s up to bit 63 i.e. by 63 - s
+ and then we LSL to bring bit 63 down to 63-(r-1)+s
+ i.e. by r - (s + 1). */
+ value <<= 63 - s;
+ value >>= r - (s + 1);
+ /* The mask must include the same bits. */
+ mask <<= 63 - s;
+ mask >>= r - (s + 1);
+ }
+
+ rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
+}
+
+static void
+dexBitfieldImmediate (sim_cpu *cpu)
+{
+ /* assert instr[28:23] = 100110
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
+ instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
+ instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
+ instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
+ uint32_t dispatch;
+ uint32_t imms;
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22);
+ /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
+ /* or else we have an UNALLOC. */
+ uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16);
+
+ if (~size & N)
+ HALT_UNALLOC;
+
+ if (!size && uimm (immr, 5, 5))
+ HALT_UNALLOC;
+
+ imms = uimm (aarch64_get_instr (cpu), 15, 10);
+ if (!size && uimm (imms, 5, 5))
+ HALT_UNALLOC;
+
+ /* Switch on combined size and op. */
+ dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+ switch (dispatch)
+ {
+ case 0: sbfm32 (cpu, immr, imms); return;
+ case 1: bfm32 (cpu, immr, imms); return;
+ case 2: ubfm32 (cpu, immr, imms); return;
+ case 4: sbfm (cpu, immr, imms); return;
+ case 5: bfm (cpu, immr, imms); return;
+ case 6: ubfm (cpu, immr, imms); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+static void
+do_EXTR_32 (sim_cpu *cpu)
+{
+ /* instr[31:21] = 00010011100
+ instr[20,16] = Rm
+ instr[15,10] = imms : 0xxxxx for 32 bit
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 31;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t val1;
+ uint64_t val2;
+
+ val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
+ val1 >>= imms;
+ val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ val2 <<= (32 - imms);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
+}
+
+static void
+do_EXTR_64 (sim_cpu *cpu)
+{
+ /* instr[31:21] = 10010011100
+ instr[20,16] = Rm
+ instr[15,10] = imms
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 63;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t val;
+
+ val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
+ val >>= imms;
+ val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
+}
+
+static void
+dexExtractImmediate (sim_cpu *cpu)
+{
+ /* assert instr[28:23] = 100111
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
+ instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
+ instr[21] = op0 : must be 0 or UNALLOC
+ instr[20,16] = Rm
+ instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
+ /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
+ uint32_t dispatch;
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22);
+ /* 32 bit operations must have imms[5] = 0
+ or else we have an UNALLOC. */
+ uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10);
+
+ if (size ^ N)
+ HALT_UNALLOC;
+
+ if (!size && uimm (imms, 5, 5))
+ HALT_UNALLOC;
+
+ /* Switch on combined size and op. */
+ dispatch = uimm (aarch64_get_instr (cpu), 31, 29);
+
+ if (dispatch == 0)
+ do_EXTR_32 (cpu);
+
+ else if (dispatch == 4)
+ do_EXTR_64 (cpu);
+
+ else if (dispatch == 1)
+ HALT_NYI;
+ else
+ HALT_UNALLOC;
+}
+
+static void
+dexDPImm (sim_cpu *cpu)
+{
+ /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
+ assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
+ bits [25,23] of a DPImm are the secondary dispatch vector. */
+ uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
+
+ switch (group2)
+ {
+ case DPIMM_PCADR_000:
+ case DPIMM_PCADR_001:
+ dexPCRelAddressing (cpu);
+ return;
+
+ case DPIMM_ADDSUB_010:
+ case DPIMM_ADDSUB_011:
+ dexAddSubtractImmediate (cpu);
+ return;
+
+ case DPIMM_LOG_100:
+ dexLogicalImmediate (cpu);
+ return;
+
+ case DPIMM_MOV_101:
+ dexMoveWideImmediate (cpu);
+ return;
+
+ case DPIMM_BITF_110:
+ dexBitfieldImmediate (cpu);
+ return;
+
+ case DPIMM_EXTR_111:
+ dexExtractImmediate (cpu);
+ return;
+
+ default:
+ /* Should never reach here. */
+ HALT_NYI;
+ }
+}
+
+static void
+dexLoadUnscaledImmediate (sim_cpu *cpu)
+{
+ /* instr[29,24] == 111_00
+ instr[21] == 0
+ instr[11,10] == 00
+ instr[31,30] = size
+ instr[26] = V
+ instr[23,22] = opc
+ instr[20,12] = simm9
+ instr[9,5] = rn may be SP. */
+ /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */
+ uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26);
+ uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2)
+ | uimm (aarch64_get_instr (cpu), 23, 22));
+ int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
+
+ if (!V)
+ {
+ /* GReg operations. */
+ switch (dispatch)
+ {
+ case 0: sturb (cpu, imm); return;
+ case 1: ldurb32 (cpu, imm); return;
+ case 2: ldursb64 (cpu, imm); return;
+ case 3: ldursb32 (cpu, imm); return;
+ case 4: sturh (cpu, imm); return;
+ case 5: ldurh32 (cpu, imm); return;
+ case 6: ldursh64 (cpu, imm); return;
+ case 7: ldursh32 (cpu, imm); return;
+ case 8: stur32 (cpu, imm); return;
+ case 9: ldur32 (cpu, imm); return;
+ case 10: ldursw (cpu, imm); return;
+ case 12: stur64 (cpu, imm); return;
+ case 13: ldur64 (cpu, imm); return;
+
+ case 14:
+ /* PRFUM NYI. */
+ HALT_NYI;
+
+ default:
+ case 11:
+ case 15:
+ HALT_UNALLOC;
+ }
+ }
+
+ /* FReg operations. */
+ switch (dispatch)
+ {
+ case 2: fsturq (cpu, imm); return;
+ case 3: fldurq (cpu, imm); return;
+ case 8: fsturs (cpu, imm); return;
+ case 9: fldurs (cpu, imm); return;
+ case 12: fsturd (cpu, imm); return;
+ case 13: fldurd (cpu, imm); return;
+
+ case 0: /* STUR 8 bit FP. */
+ case 1: /* LDUR 8 bit FP. */
+ case 4: /* STUR 16 bit FP. */
+ case 5: /* LDUR 8 bit FP. */
+ HALT_NYI;
+
+ default:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 14:
+ case 15:
+ HALT_UNALLOC;
+ }
+}
+
+/* N.B. A preliminary note regarding all the ldrs<x>32
+ instructions
+
+ The signed value loaded by these instructions is cast to unsigned
+ before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
+ 64 bit element of the GReg union. this performs a 32 bit sign extension
+ (as required) but avoids 64 bit sign extension, thus ensuring that the
+ top half of the register word is zero. this is what the spec demands
+ when a 32 bit load occurs. */
+
+/* 32 bit load sign-extended byte scaled unsigned 12 bit. */
+static void
+ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* The target register may not be SP but the source may be
+ there is no scaling required for a byte load. */
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ (int64_t) aarch64_get_mem_s8 (cpu, address));
+}
+
+/* 32 bit load sign-extended byte scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned int rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* rn may reference SP, rm and rt must reference ZR. */
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+
+ /* There is no scaling required for a byte load. */
+ aarch64_set_reg_u64
+ (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
+ + displacement));
+}
+
+/* 32 bit load sign-extended byte unscaled signed 9 bit with
+ pre- or post-writeback. */
+static void
+ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ uint64_t address;
+ unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (rn == rt && wb != NoWriteBack)
+ HALT_UNALLOC;
+
+ address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb == Pre)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rt, NO_SP,
+ (int64_t) aarch64_get_mem_s8 (cpu, address));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
+}
+
+/* 8 bit store scaled. */
+static void
+fstrb_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ aarch64_set_mem_u8 (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
+ aarch64_get_vec_u8 (cpu, st, 0));
+}
+
+/* 8 bit store scaled or unscaled zero- or
+ sign-extended 8-bit register offset. */
+static void
+fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_mem_u8
+ (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
+}
+
+/* 16 bit store scaled. */
+static void
+fstrh_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ aarch64_set_mem_u16
+ (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
+ aarch64_get_vec_u16 (cpu, st, 0));
+}
+
+/* 16 bit store scaled or unscaled zero-
+ or sign-extended 16-bit register offset. */
+static void
+fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_mem_u16
+ (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
+}
+
+/* 32 bit store scaled unsigned 12 bit. */
+static void
+fstrs_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ aarch64_set_mem_float
+ (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
+ aarch64_get_FP_float (cpu, st));
+}
+
+/* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, st));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 32 bit store scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 32, scaling);
+
+ aarch64_set_mem_float
+ (cpu, address + displacement, aarch64_get_FP_float (cpu, st));
+}
+
+/* 64 bit store scaled unsigned 12 bit. */
+static void
+fstrd_abs (sim_cpu *cpu, uint32_t offset)
+{
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ aarch64_set_mem_double
+ (cpu,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
+ aarch64_get_FP_double (cpu, st));
+}
+
+/* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, st));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 64 bit store scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 64, scaling);
+
+ aarch64_set_mem_double
+ (cpu, address + displacement, aarch64_get_FP_double (cpu, st));
+}
+
+/* 128 bit store scaled unsigned 12 bit. */
+static void
+fstrq_abs (sim_cpu *cpu, uint32_t offset)
+{
+ FRegister a;
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ uint64_t addr;
+
+ aarch64_get_FP_long_double (cpu, st, & a);
+
+ addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
+ aarch64_set_mem_long_double (cpu, addr, a);
+}
+
+/* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
+static void
+fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ FRegister a;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_get_FP_long_double (cpu, st, & a);
+ aarch64_set_mem_long_double (cpu, address, a);
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
+}
+
+/* 128 bit store scaled or unscaled zero-
+ or sign-extended 32-bit register offset. */
+static void
+fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned st = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
+ int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
+ extension);
+ uint64_t displacement = OPT_SCALE (extended, 128, scaling);
+
+ FRegister a;
+
+ aarch64_get_FP_long_double (cpu, st, & a);
+ aarch64_set_mem_long_double (cpu, address + displacement, a);
+}
+
+static void
+dexLoadImmediatePrePost (sim_cpu *cpu)
+{
+ /* instr[29,24] == 111_00
+ instr[21] == 0
+ instr[11,10] == 00
+ instr[31,30] = size
+ instr[26] = V
+ instr[23,22] = opc
+ instr[20,12] = simm9
+ instr[11] = wb : 0 ==> Post, 1 ==> Pre
+ instr[9,5] = rn may be SP. */
+ /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */
+ uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26);
+ uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2)
+ | uimm (aarch64_get_instr (cpu), 23, 22));
+ int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
+ WriteBack wb = writeback (aarch64_get_instr (cpu), 11);
+
+ if (!V)
+ {
+ /* GReg operations. */
+ switch (dispatch)
+ {
+ case 0: strb_wb (cpu, imm, wb); return;
+ case 1: ldrb32_wb (cpu, imm, wb); return;
+ case 2: ldrsb_wb (cpu, imm, wb); return;
+ case 3: ldrsb32_wb (cpu, imm, wb); return;
+ case 4: strh_wb (cpu, imm, wb); return;
+ case 5: ldrh32_wb (cpu, imm, wb); return;
+ case 6: ldrsh64_wb (cpu, imm, wb); return;
+ case 7: ldrsh32_wb (cpu, imm, wb); return;
+ case 8: str32_wb (cpu, imm, wb); return;
+ case 9: ldr32_wb (cpu, imm, wb); return;
+ case 10: ldrsw_wb (cpu, imm, wb); return;
+ case 12: str_wb (cpu, imm, wb); return;
+ case 13: ldr_wb (cpu, imm, wb); return;
+
+ default:
+ case 11:
+ case 14:
+ case 15:
+ HALT_UNALLOC;
+ }
+ }
+
+ /* FReg operations. */
+ switch (dispatch)
+ {
+ case 2: fstrq_wb (cpu, imm, wb); return;
+ case 3: fldrq_wb (cpu, imm, wb); return;
+ case 8: fstrs_wb (cpu, imm, wb); return;
+ case 9: fldrs_wb (cpu, imm, wb); return;
+ case 12: fstrd_wb (cpu, imm, wb); return;
+ case 13: fldrd_wb (cpu, imm, wb); return;
+
+ case 0: /* STUR 8 bit FP. */
+ case 1: /* LDUR 8 bit FP. */
+ case 4: /* STUR 16 bit FP. */
+ case 5: /* LDUR 8 bit FP. */
+ HALT_NYI;
+
+ default:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 14:
+ case 15:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexLoadRegisterOffset (sim_cpu *cpu)
+{
+ /* instr[31,30] = size
+ instr[29,27] = 111
+ instr[26] = V
+ instr[25,24] = 00
+ instr[23,22] = opc
+ instr[21] = 1
+ instr[20,16] = rm
+ instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
+ 110 ==> SXTW, 111 ==> SXTX,
+ ow ==> RESERVED
+ instr[12] = scaled
+ instr[11,10] = 10
+ instr[9,5] = rn
+ instr[4,0] = rt. */
+
+ uint32_t V = uimm (aarch64_get_instr (cpu), 26,26);
+ uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2)
+ | uimm (aarch64_get_instr (cpu), 23, 22));
+ Scaling scale = scaling (aarch64_get_instr (cpu), 12);
+ Extension extensionType = extension (aarch64_get_instr (cpu), 13);
+
+ /* Check for illegal extension types. */
+ if (uimm (extensionType, 1, 1) == 0)
+ HALT_UNALLOC;
+
+ if (extensionType == UXTX || extensionType == SXTX)
+ extensionType = NoExtension;
+
+ if (!V)
+ {
+ /* GReg operations. */
+ switch (dispatch)
+ {
+ case 0: strb_scale_ext (cpu, scale, extensionType); return;
+ case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
+ case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
+ case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
+ case 4: strh_scale_ext (cpu, scale, extensionType); return;
+ case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
+ case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
+ case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
+ case 8: str32_scale_ext (cpu, scale, extensionType); return;
+ case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
+ case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
+ case 12: str_scale_ext (cpu, scale, extensionType); return;
+ case 13: ldr_scale_ext (cpu, scale, extensionType); return;
+ case 14: prfm_scale_ext (cpu, scale, extensionType); return;
+
+ default:
+ case 11:
+ case 15:
+ HALT_UNALLOC;
+ }
+ }
+
+ /* FReg operations. */
+ switch (dispatch)
+ {
+ case 1: /* LDUR 8 bit FP. */
+ HALT_NYI;
+ case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
+ case 5: /* LDUR 8 bit FP. */
+ HALT_NYI;
+ case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
+ case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
+
+ case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
+ case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
+ case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
+ case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
+ case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
+
+ default:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 14:
+ case 15:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexLoadUnsignedImmediate (sim_cpu *cpu)
+{
+ /* assert instr[29,24] == 111_01
+ instr[31,30] = size
+ instr[26] = V
+ instr[23,22] = opc
+ instr[21,10] = uimm12 : unsigned immediate offset
+ instr[9,5] = rn may be SP. */
+ /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */
+ uint32_t V = uimm (aarch64_get_instr (cpu), 26,26);
+ uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2)
+ | uimm (aarch64_get_instr (cpu), 23, 22));
+ uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10);
+
+ if (!V)
+ {
+ /* GReg operations. */
+ switch (dispatch)
+ {
+ case 0: strb_abs (cpu, imm); return;
+ case 1: ldrb32_abs (cpu, imm); return;
+ case 2: ldrsb_abs (cpu, imm); return;
+ case 3: ldrsb32_abs (cpu, imm); return;
+ case 4: strh_abs (cpu, imm); return;
+ case 5: ldrh32_abs (cpu, imm); return;
+ case 6: ldrsh_abs (cpu, imm); return;
+ case 7: ldrsh32_abs (cpu, imm); return;
+ case 8: str32_abs (cpu, imm); return;
+ case 9: ldr32_abs (cpu, imm); return;
+ case 10: ldrsw_abs (cpu, imm); return;
+ case 12: str_abs (cpu, imm); return;
+ case 13: ldr_abs (cpu, imm); return;
+ case 14: prfm_abs (cpu, imm); return;
+
+ default:
+ case 11:
+ case 15:
+ HALT_UNALLOC;
+ }
+ }
+
+ /* FReg operations. */
+ switch (dispatch)
+ {
+ case 3: fldrq_abs (cpu, imm); return;
+ case 9: fldrs_abs (cpu, imm); return;
+ case 13: fldrd_abs (cpu, imm); return;
+
+ case 0: fstrb_abs (cpu, imm); return;
+ case 2: fstrq_abs (cpu, imm); return;
+ case 4: fstrh_abs (cpu, imm); return;
+ case 8: fstrs_abs (cpu, imm); return;
+ case 12: fstrd_abs (cpu, imm); return;
+
+ case 1: /* LDR 8 bit FP. */
+ case 5: /* LDR 8 bit FP. */
+ HALT_NYI;
+
+ default:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 14:
+ case 15:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+dexLoadExclusive (sim_cpu *cpu)
+{
+ /* assert instr[29:24] = 001000;
+ instr[31,30] = size
+ instr[23] = 0 if exclusive
+ instr[22] = L : 1 if load, 0 if store
+ instr[21] = 1 if pair
+ instr[20,16] = Rs
+ instr[15] = o0 : 1 if ordered
+ instr[14,10] = Rt2
+ instr[9,5] = Rn
+ instr[4.0] = Rt. */
+
+ switch (uimm (aarch64_get_instr (cpu), 22, 21))
+ {
+ case 2: ldxr (cpu); return;
+ case 0: stxr (cpu); return;
+ default: HALT_NYI;
+ }
+}
+
+static void
+dexLoadOther (sim_cpu *cpu)
+{
+ uint32_t dispatch;
+
+ /* instr[29,25] = 111_0
+ instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
+ instr[21:11,10] is the secondary dispatch. */
+ if (uimm (aarch64_get_instr (cpu), 24, 24))
+ {
+ dexLoadUnsignedImmediate (cpu);
+ return;
+ }
+
+ dispatch = ( (uimm (aarch64_get_instr (cpu), 21, 21) << 2)
+ | uimm (aarch64_get_instr (cpu), 11, 10));
+ switch (dispatch)
+ {
+ case 0: dexLoadUnscaledImmediate (cpu); return;
+ case 1: dexLoadImmediatePrePost (cpu); return;
+ case 3: dexLoadImmediatePrePost (cpu); return;
+ case 6: dexLoadRegisterOffset (cpu); return;
+
+ default:
+ case 2:
+ case 4:
+ case 5:
+ case 7:
+ HALT_NYI;
+ }
+}
+
+static void
+store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ if ((rn == rd || rm == rd) && wb != NoWriteBack)
+ HALT_UNALLOC; /* ??? */
+
+ offset <<= 2;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u32 (cpu, address,
+ aarch64_get_reg_u32 (cpu, rm, NO_SP));
+ aarch64_set_mem_u32 (cpu, address + 4,
+ aarch64_get_reg_u32 (cpu, rn, NO_SP));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ if ((rn == rd || rm == rd) && wb != NoWriteBack)
+ HALT_UNALLOC; /* ??? */
+
+ offset <<= 3;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_u64 (cpu, address,
+ aarch64_get_reg_u64 (cpu, rm, SP_OK));
+ aarch64_set_mem_u64 (cpu, address + 8,
+ aarch64_get_reg_u64 (cpu, rn, SP_OK));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ /* treat this as unalloc to make sure we don't do it. */
+ if (rn == rm)
+ HALT_UNALLOC;
+
+ offset <<= 2;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ /* Treat this as unalloc to make sure we don't do it. */
+ if (rn == rm)
+ HALT_UNALLOC;
+
+ offset <<= 2;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
+ aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ /* Treat this as unalloc to make sure we don't do it. */
+ if (rn == rm)
+ HALT_UNALLOC;
+
+ offset <<= 3;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
+ aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+dex_load_store_pair_gr (sim_cpu *cpu)
+{
+ /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
+ instr[29,25] = instruction encoding: 101_0
+ instr[26] = V : 1 if fp 0 if gp
+ instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
+ instr[22] = load/store (1=> load)
+ instr[21,15] = signed, scaled, offset
+ instr[14,10] = Rn
+ instr[ 9, 5] = Rd
+ instr[ 4, 0] = Rm. */
+
+ uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3)
+ | uimm (aarch64_get_instr (cpu), 24, 22));
+ int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
+
+ switch (dispatch)
+ {
+ case 2: store_pair_u32 (cpu, offset, Post); return;
+ case 3: load_pair_u32 (cpu, offset, Post); return;
+ case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
+ case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
+ case 6: store_pair_u32 (cpu, offset, Pre); return;
+ case 7: load_pair_u32 (cpu, offset, Pre); return;
+
+ case 11: load_pair_s32 (cpu, offset, Post); return;
+ case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
+ case 15: load_pair_s32 (cpu, offset, Pre); return;
+
+ case 18: store_pair_u64 (cpu, offset, Post); return;
+ case 19: load_pair_u64 (cpu, offset, Post); return;
+ case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
+ case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
+ case 22: store_pair_u64 (cpu, offset, Pre); return;
+ case 23: load_pair_u64 (cpu, offset, Pre); return;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ offset <<= 2;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, rm));
+ aarch64_set_mem_float (cpu, address + 4, aarch64_get_FP_float (cpu, rn));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ offset <<= 3;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, rm));
+ aarch64_set_mem_double (cpu, address + 8, aarch64_get_FP_double (cpu, rn));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ FRegister a;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ offset <<= 4;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_get_FP_long_double (cpu, rm, & a);
+ aarch64_set_mem_long_double (cpu, address, a);
+ aarch64_get_FP_long_double (cpu, rn, & a);
+ aarch64_set_mem_long_double (cpu, address + 16, a);
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ if (rm == rn)
+ HALT_UNALLOC;
+
+ offset <<= 2;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_FP_float (cpu, rm, aarch64_get_mem_float (cpu, address));
+ aarch64_set_FP_float (cpu, rn, aarch64_get_mem_float (cpu, address + 4));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ if (rm == rn)
+ HALT_UNALLOC;
+
+ offset <<= 3;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_set_FP_double (cpu, rm, aarch64_get_mem_double (cpu, address));
+ aarch64_set_FP_double (cpu, rn, aarch64_get_mem_double (cpu, address + 8));
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
+{
+ FRegister a;
+ unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
+
+ if (rm == rn)
+ HALT_UNALLOC;
+
+ offset <<= 4;
+
+ if (wb != Post)
+ address += offset;
+
+ aarch64_get_mem_long_double (cpu, address, & a);
+ aarch64_set_FP_long_double (cpu, rm, a);
+ aarch64_get_mem_long_double (cpu, address + 16, & a);
+ aarch64_set_FP_long_double (cpu, rn, a);
+
+ if (wb == Post)
+ address += offset;
+
+ if (wb != NoWriteBack)
+ aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
+}
+
+static void
+dex_load_store_pair_fp (sim_cpu *cpu)
+{
+ /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
+ instr[29,25] = instruction encoding
+ instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
+ instr[22] = load/store (1=> load)
+ instr[21,15] = signed, scaled, offset
+ instr[14,10] = Rn
+ instr[ 9, 5] = Rd
+ instr[ 4, 0] = Rm */
+
+ uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3)
+ | uimm (aarch64_get_instr (cpu), 24, 22));
+ int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
+
+ switch (dispatch)
+ {
+ case 2: store_pair_float (cpu, offset, Post); return;
+ case 3: load_pair_float (cpu, offset, Post); return;
+ case 4: store_pair_float (cpu, offset, NoWriteBack); return;
+ case 5: load_pair_float (cpu, offset, NoWriteBack); return;
+ case 6: store_pair_float (cpu, offset, Pre); return;
+ case 7: load_pair_float (cpu, offset, Pre); return;
+
+ case 10: store_pair_double (cpu, offset, Post); return;
+ case 11: load_pair_double (cpu, offset, Post); return;
+ case 12: store_pair_double (cpu, offset, NoWriteBack); return;
+ case 13: load_pair_double (cpu, offset, NoWriteBack); return;
+ case 14: store_pair_double (cpu, offset, Pre); return;
+ case 15: load_pair_double (cpu, offset, Pre); return;
+
+ case 18: store_pair_long_double (cpu, offset, Post); return;
+ case 19: load_pair_long_double (cpu, offset, Post); return;
+ case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
+ case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
+ case 22: store_pair_long_double (cpu, offset, Pre); return;
+ case 23: load_pair_long_double (cpu, offset, Pre); return;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static inline unsigned
+vec_reg (unsigned v, unsigned o)
+{
+ return (v + o) & 0x3F;
+}
+
+/* Load multiple N-element structures to N consecutive registers. */
+static void
+vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
+{
+ int all = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 11, 10);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ switch (size)
+ {
+ case 0: /* 8-bit operations. */
+ if (all)
+ for (i = 0; i < (16 * N); i++)
+ aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
+ aarch64_get_mem_u8 (cpu, address + i));
+ else
+ for (i = 0; i < (8 * N); i++)
+ aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
+ aarch64_get_mem_u8 (cpu, address + i));
+ return;
+
+ case 1: /* 16-bit operations. */
+ if (all)
+ for (i = 0; i < (8 * N); i++)
+ aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
+ aarch64_get_mem_u16 (cpu, address + i * 2));
+ else
+ for (i = 0; i < (4 * N); i++)
+ aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
+ aarch64_get_mem_u16 (cpu, address + i * 2));
+ return;
+
+ case 2: /* 32-bit operations. */
+ if (all)
+ for (i = 0; i < (4 * N); i++)
+ aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
+ aarch64_get_mem_u32 (cpu, address + i * 4));
+ else
+ for (i = 0; i < (2 * N); i++)
+ aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
+ aarch64_get_mem_u32 (cpu, address + i * 4));
+ return;
+
+ case 3: /* 64-bit operations. */
+ if (all)
+ for (i = 0; i < (2 * N); i++)
+ aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
+ aarch64_get_mem_u64 (cpu, address + i * 8));
+ else
+ for (i = 0; i < N; i++)
+ aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
+ aarch64_get_mem_u64 (cpu, address + i * 8));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+/* LD4: load multiple 4-element to four consecutive registers. */
+static void
+LD4 (sim_cpu *cpu, uint64_t address)
+{
+ vec_load (cpu, address, 4);
+}
+
+/* LD3: load multiple 3-element structures to three consecutive registers. */
+static void
+LD3 (sim_cpu *cpu, uint64_t address)
+{
+ vec_load (cpu, address, 3);
+}
+
+/* LD2: load multiple 2-element structures to two consecutive registers. */
+static void
+LD2 (sim_cpu *cpu, uint64_t address)
+{
+ vec_load (cpu, address, 2);
+}
+
+/* Load multiple 1-element structures into one register. */
+static void
+LD1_1 (sim_cpu *cpu, uint64_t address)
+{
+ int all = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 11, 10);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ switch (size)
+ {
+ case 0:
+ /* LD1 {Vd.16b}, addr, #16 */
+ /* LD1 {Vd.8b}, addr, #8 */
+ for (i = 0; i < (all ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_mem_u8 (cpu, address + i));
+ return;
+
+ case 1:
+ /* LD1 {Vd.8h}, addr, #16 */
+ /* LD1 {Vd.4h}, addr, #8 */
+ for (i = 0; i < (all ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_mem_u16 (cpu, address + i * 2));
+ return;
+
+ case 2:
+ /* LD1 {Vd.4s}, addr, #16 */
+ /* LD1 {Vd.2s}, addr, #8 */
+ for (i = 0; i < (all ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_mem_u32 (cpu, address + i * 4));
+ return;
+
+ case 3:
+ /* LD1 {Vd.2d}, addr, #16 */
+ /* LD1 {Vd.1d}, addr, #8 */
+ for (i = 0; i < (all ? 2 : 1); i++)
+ aarch64_set_vec_u64 (cpu, vd, i,
+ aarch64_get_mem_u64 (cpu, address + i * 8));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+/* Load multiple 1-element structures into two registers. */
+static void
+LD1_2 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the LD2 version.
+ So why have two different instructions ? There must be something
+ wrong somewhere. */
+ vec_load (cpu, address, 2);
+}
+
+/* Load multiple 1-element structures into three registers. */
+static void
+LD1_3 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the LD3 version.
+ So why have two different instructions ? There must be something
+ wrong somewhere. */
+ vec_load (cpu, address, 3);
+}
+
+/* Load multiple 1-element structures into four registers. */
+static void
+LD1_4 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the LD4 version.
+ So why have two different instructions ? There must be something
+ wrong somewhere. */
+ vec_load (cpu, address, 4);
+}
+
+/* Store multiple N-element structures to N consecutive registers. */
+static void
+vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
+{
+ int all = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 11, 10);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ switch (size)
+ {
+ case 0: /* 8-bit operations. */
+ if (all)
+ for (i = 0; i < (16 * N); i++)
+ aarch64_set_mem_u8
+ (cpu, address + i,
+ aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
+ else
+ for (i = 0; i < (8 * N); i++)
+ aarch64_set_mem_u8
+ (cpu, address + i,
+ aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
+ return;
+
+ case 1: /* 16-bit operations. */
+ if (all)
+ for (i = 0; i < (8 * N); i++)
+ aarch64_set_mem_u16
+ (cpu, address + i * 2,
+ aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
+ else
+ for (i = 0; i < (4 * N); i++)
+ aarch64_set_mem_u16
+ (cpu, address + i * 2,
+ aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
+ return;
+
+ case 2: /* 32-bit operations. */
+ if (all)
+ for (i = 0; i < (4 * N); i++)
+ aarch64_set_mem_u32
+ (cpu, address + i * 4,
+ aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
+ else
+ for (i = 0; i < (2 * N); i++)
+ aarch64_set_mem_u32
+ (cpu, address + i * 4,
+ aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
+ return;
+
+ case 3: /* 64-bit operations. */
+ if (all)
+ for (i = 0; i < (2 * N); i++)
+ aarch64_set_mem_u64
+ (cpu, address + i * 8,
+ aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
+ else
+ for (i = 0; i < N; i++)
+ aarch64_set_mem_u64
+ (cpu, address + i * 8,
+ aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+/* Store multiple 4-element structure to four consecutive registers. */
+static void
+ST4 (sim_cpu *cpu, uint64_t address)
+{
+ vec_store (cpu, address, 4);
+}
+
+/* Store multiple 3-element structures to three consecutive registers. */
+static void
+ST3 (sim_cpu *cpu, uint64_t address)
+{
+ vec_store (cpu, address, 3);
+}
+
+/* Store multiple 2-element structures to two consecutive registers. */
+static void
+ST2 (sim_cpu *cpu, uint64_t address)
+{
+ vec_store (cpu, address, 2);
+}
+
+/* Store multiple 1-element structures into one register. */
+static void
+ST1_1 (sim_cpu *cpu, uint64_t address)
+{
+ int all = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned size = uimm (aarch64_get_instr (cpu), 11, 10);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned i;
+
+ switch (size)
+ {
+ case 0:
+ for (i = 0; i < (all ? 16 : 8); i++)
+ aarch64_set_mem_u8 (cpu, address + i,
+ aarch64_get_vec_u8 (cpu, vd, i));
+ return;
+
+ case 1:
+ for (i = 0; i < (all ? 8 : 4); i++)
+ aarch64_set_mem_u16 (cpu, address + i * 2,
+ aarch64_get_vec_u16 (cpu, vd, i));
+ return;
+
+ case 2:
+ for (i = 0; i < (all ? 4 : 2); i++)
+ aarch64_set_mem_u32 (cpu, address + i * 4,
+ aarch64_get_vec_u32 (cpu, vd, i));
+ return;
+
+ case 3:
+ for (i = 0; i < (all ? 2 : 1); i++)
+ aarch64_set_mem_u64 (cpu, address + i * 8,
+ aarch64_get_vec_u64 (cpu, vd, i));
+ return;
+
+ default:
+ HALT_UNREACHABLE;
+ }
+}
+
+/* Store multiple 1-element structures into two registers. */
+static void
+ST1_2 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the ST2 version.
+ So why have two different instructions ? There must be
+ something wrong somewhere. */
+ vec_store (cpu, address, 2);
+}
+
+/* Store multiple 1-element structures into three registers. */
+static void
+ST1_3 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the ST3 version.
+ So why have two different instructions ? There must be
+ something wrong somewhere. */
+ vec_store (cpu, address, 3);
+}
+
+/* Store multiple 1-element structures into four registers. */
+static void
+ST1_4 (sim_cpu *cpu, uint64_t address)
+{
+ /* FIXME: This algorithm is *exactly* the same as the ST4 version.
+ So why have two different instructions ? There must be
+ something wrong somewhere. */
+ vec_store (cpu, address, 4);
+}
+
+static void
+do_vec_LDnR (sim_cpu *cpu, uint64_t address)
+{
+ /* instr[31] = 0
+ instr[30] = element selector 0=>half, 1=>all elements
+ instr[29,24] = 00 1101
+ instr[23] = 0=>simple, 1=>post
+ instr[22] = 1
+ instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
+ instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
+ 11111 (immediate post inc)
+ instr[15,14] = 11
+ instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
+ instr[12] = 0
+ instr[11,10] = element size 00=> byte(b), 01=> half(h),
+ 10=> word(s), 11=> double(d)
+ instr[9,5] = address
+ instr[4,0] = Vd */
+
+ unsigned full = uimm (aarch64_get_instr (cpu), 30, 30);
+ unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0);
+ unsigned size = uimm (aarch64_get_instr (cpu), 11, 10);
+ int i;
+
+ NYI_assert (29, 24, 0x0D);
+ NYI_assert (22, 22, 1);
+ NYI_assert (15, 14, 3);
+ NYI_assert (12, 12, 0);
+
+ switch ((uimm (aarch64_get_instr (cpu), 13, 13) << 1)
+ | uimm (aarch64_get_instr (cpu), 21, 21))
+ {
+ case 0: /* LD1R. */
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val = aarch64_get_mem_u8 (cpu, address);
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i, val);
+ break;
+ }
+
+ case 1:
+ {
+ uint16_t val = aarch64_get_mem_u16 (cpu, address);
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i, val);
+ break;
+ }
+
+ case 2:
+ {
+ uint32_t val = aarch64_get_mem_u32 (cpu, address);
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i, val);
+ break;
+ }
+
+ case 3:
+ {
+ uint64_t val = aarch64_get_mem_u64 (cpu, address);
+ for (i = 0; i < (full ? 2 : 1); i++)
+ aarch64_set_vec_u64 (cpu, vd, i, val);
+ break;
+ }
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+
+ case 1: /* LD2R. */
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
+ uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ aarch64_set_vec_u8 (cpu, vd, 0, val1);
+ aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
+ }
+ break;
+ }
+
+ case 1:
+ {
+ uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
+ uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ aarch64_set_vec_u16 (cpu, vd, 0, val1);
+ aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
+ }
+ break;
+ }
+
+ case 2:
+ {
+ uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
+ uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ aarch64_set_vec_u32 (cpu, vd, 0, val1);
+ aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
+ }
+ break;
+ }
+
+ case 3:
+ {
+ uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
+ uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
+
+ for (i = 0; i < (full ? 2 : 1); i++)
+ {
+ aarch64_set_vec_u64 (cpu, vd, 0, val1);
+ aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
+ }
+ break;
+ }
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+
+ case 2: /* LD3R. */
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
+ uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
+ uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ aarch64_set_vec_u8 (cpu, vd, 0, val1);
+ aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
+ }
+ }
+ break;
+
+ case 1:
+ {
+ uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
+ uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
+ uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ aarch64_set_vec_u16 (cpu, vd, 0, val1);
+ aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
+ }
+ }
+ break;
+
+ case 2:
+ {
+ uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
+ uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
+ uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ aarch64_set_vec_u32 (cpu, vd, 0, val1);
+ aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
+ }
+ }
+ break;
+
+ case 3:
+ {
+ uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
+ uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
+ uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
+
+ for (i = 0; i < (full ? 2 : 1); i++)
+ {
+ aarch64_set_vec_u64 (cpu, vd, 0, val1);
+ aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
+ }
+ }
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+
+ case 3: /* LD4R. */
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
+ uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
+ uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
+ uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
+
+ for (i = 0; i < (full ? 16 : 8); i++)
+ {
+ aarch64_set_vec_u8 (cpu, vd, 0, val1);
+ aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
+ aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
+ }
+ }
+ break;
+
+ case 1:
+ {
+ uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
+ uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
+ uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
+ uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
+
+ for (i = 0; i < (full ? 8 : 4); i++)
+ {
+ aarch64_set_vec_u16 (cpu, vd, 0, val1);
+ aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
+ aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
+ }
+ }
+ break;
+
+ case 2:
+ {
+ uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
+ uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
+ uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
+ uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
+
+ for (i = 0; i < (full ? 4 : 2); i++)
+ {
+ aarch64_set_vec_u32 (cpu, vd, 0, val1);
+ aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
+ aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
+ }
+ }
+ break;
+
+ case 3:
+ {
+ uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
+ uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
+ uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
+ uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
+
+ for (i = 0; i < (full ? 2 : 1); i++)
+ {
+ aarch64_set_vec_u64 (cpu, vd, 0, val1);
+ aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
+ aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
+ aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
+ }
+ }
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+}
+
+static void
+do_vec_load_store (sim_cpu *cpu)
+{
+ /* {LD|ST}<N> {Vd..Vd+N}, vaddr
+
+ instr[31] = 0
+ instr[30] = element selector 0=>half, 1=>all elements
+ instr[29,25] = 00110
+ instr[24] = ?
+ instr[23] = 0=>simple, 1=>post
+ instr[22] = 0=>store, 1=>load
+ instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
+ instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
+ 11111 (immediate post inc)
+ instr[15,12] = elements and destinations. eg for load:
+ 0000=>LD4 => load multiple 4-element to
+ four consecutive registers
+ 0100=>LD3 => load multiple 3-element to
+ three consecutive registers
+ 1000=>LD2 => load multiple 2-element to
+ two consecutive registers
+ 0010=>LD1 => load multiple 1-element to
+ four consecutive registers
+ 0110=>LD1 => load multiple 1-element to
+ three consecutive registers
+ 1010=>LD1 => load multiple 1-element to
+ two consecutive registers
+ 0111=>LD1 => load multiple 1-element to
+ one register
+ 1100=>LDR1,LDR2
+ 1110=>LDR3,LDR4
+ instr[11,10] = element size 00=> byte(b), 01=> half(h),
+ 10=> word(s), 11=> double(d)
+ instr[9,5] = Vn, can be SP
+ instr[4,0] = Vd */
+
+ int post;
+ int load;
+ unsigned vn;
+ uint64_t address;
+ int type;
+
+ if (uimm (aarch64_get_instr (cpu), 31, 31) != 0
+ || uimm (aarch64_get_instr (cpu), 29, 25) != 0x06)
+ HALT_NYI;
+
+ type = uimm (aarch64_get_instr (cpu), 15, 12);
+ if (type != 0xE && type != 0xE && uimm (aarch64_get_instr (cpu), 21, 21) != 0)
+ HALT_NYI;
+
+ post = uimm (aarch64_get_instr (cpu), 23, 23);
+ load = uimm (aarch64_get_instr (cpu), 22, 22);
+ vn = uimm (aarch64_get_instr (cpu), 9, 5);
+ address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
+
+ if (post)
+ {
+ unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16);
+
+ if (vm == R31)
+ {
+ unsigned sizeof_operation;
+
+ switch (type)
+ {
+ case 0: sizeof_operation = 32; break;
+ case 4: sizeof_operation = 24; break;
+ case 8: sizeof_operation = 16; break;
+
+ case 0xC:
+ sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 2 : 1;
+ sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10);
+ break;
+
+ case 0xE:
+ sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 8 : 4;
+ sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10);
+ break;
+
+ case 2:
+ case 6:
+ case 10:
+ case 7:
+ sizeof_operation = 2 << uimm (aarch64_get_instr (cpu), 11, 10);
+ break;
+
+ default:
+ HALT_UNALLOC;
+ }
+
+ if (uimm (aarch64_get_instr (cpu), 30, 30))
+ sizeof_operation *= 2;
+
+ aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
+ }
+ else
+ aarch64_set_reg_u64 (cpu, vn, SP_OK,
+ address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
+ }
+ else
+ {
+ NYI_assert (20, 16, 0);
+ }
+
+ if (load)
+ {
+ switch (type)
+ {
+ case 0: LD4 (cpu, address); return;
+ case 4: LD3 (cpu, address); return;
+ case 8: LD2 (cpu, address); return;
+ case 2: LD1_4 (cpu, address); return;
+ case 6: LD1_3 (cpu, address); return;
+ case 10: LD1_2 (cpu, address); return;
+ case 7: LD1_1 (cpu, address); return;
+
+ case 0xE:
+ case 0xC: do_vec_LDnR (cpu, address); return;
+
+ default:
+ HALT_NYI;
+ }
+ }
+
+ /* Stores. */
+ switch (type)
+ {
+ case 0: ST4 (cpu, address); return;
+ case 4: ST3 (cpu, address); return;
+ case 8: ST2 (cpu, address); return;
+ case 2: ST1_4 (cpu, address); return;
+ case 6: ST1_3 (cpu, address); return;
+ case 10: ST1_2 (cpu, address); return;
+ case 7: ST1_1 (cpu, address); return;
+ default:
+ HALT_NYI;
+ }
+}
+
+static void
+dexLdSt (sim_cpu *cpu)
+{
+ /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
+ assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
+ group == GROUP_LDST_1100 || group == GROUP_LDST_1110
+ bits [29,28:26] of a LS are the secondary dispatch vector. */
+ uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
+
+ switch (group2)
+ {
+ case LS_EXCL_000:
+ dexLoadExclusive (cpu); return;
+
+ case LS_LIT_010:
+ case LS_LIT_011:
+ dexLoadLiteral (cpu); return;
+
+ case LS_OTHER_110:
+ case LS_OTHER_111:
+ dexLoadOther (cpu); return;
+
+ case LS_ADVSIMD_001:
+ do_vec_load_store (cpu); return;
+
+ case LS_PAIR_100:
+ dex_load_store_pair_gr (cpu); return;
+
+ case LS_PAIR_101:
+ dex_load_store_pair_fp (cpu); return;
+
+ default:
+ /* Should never reach here. */
+ HALT_NYI;
+ }
+}
+
+/* Specific decode and execute for group Data Processing Register. */
+
+static void
+dexLogicalShiftedRegister (sim_cpu *cpu)
+{
+ /* assert instr[28:24] = 01010
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30,29:21] = op,N : 000 ==> AND, 001 ==> BIC,
+ 010 ==> ORR, 011 ==> ORN
+ 100 ==> EOR, 101 ==> EON,
+ 110 ==> ANDS, 111 ==> BICS
+ instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
+ instr[15,10] = count : must be 0xxxxx for 32 bit
+ instr[9,5] = Rn
+ instr[4,0] = Rd */
+
+ /* unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); */
+ uint32_t dispatch;
+ Shift shiftType;
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+
+ /* 32 bit operations must have count[5] = 0. */
+ /* or else we have an UNALLOC. */
+ uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10);
+
+ if (!size && uimm (count, 5, 5))
+ HALT_UNALLOC;
+
+ shiftType = shift (aarch64_get_instr (cpu), 22);
+
+ /* dispatch on size:op:N i.e aarch64_get_instr (cpu)[31,29:21]. */
+ dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 29) << 1)
+ | uimm (aarch64_get_instr (cpu), 21, 21));
+
+ switch (dispatch)
+ {
+ case 0: and32_shift (cpu, shiftType, count); return;
+ case 1: bic32_shift (cpu, shiftType, count); return;
+ case 2: orr32_shift (cpu, shiftType, count); return;
+ case 3: orn32_shift (cpu, shiftType, count); return;
+ case 4: eor32_shift (cpu, shiftType, count); return;
+ case 5: eon32_shift (cpu, shiftType, count); return;
+ case 6: ands32_shift (cpu, shiftType, count); return;
+ case 7: bics32_shift (cpu, shiftType, count); return;
+ case 8: and64_shift (cpu, shiftType, count); return;
+ case 9: bic64_shift (cpu, shiftType, count); return;
+ case 10:orr64_shift (cpu, shiftType, count); return;
+ case 11:orn64_shift (cpu, shiftType, count); return;
+ case 12:eor64_shift (cpu, shiftType, count); return;
+ case 13:eon64_shift (cpu, shiftType, count); return;
+ case 14:ands64_shift (cpu, shiftType, count); return;
+ case 15:bics64_shift (cpu, shiftType, count); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+/* 32 bit conditional select. */
+static void
+csel32 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ : aarch64_get_reg_u32 (cpu, rm, NO_SP));
+}
+
+/* 64 bit conditional select. */
+static void
+csel64 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ : aarch64_get_reg_u64 (cpu, rm, NO_SP));
+}
+
+/* 32 bit conditional increment. */
+static void
+csinc32 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
+}
+
+/* 64 bit conditional increment. */
+static void
+csinc64 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
+}
+
+/* 32 bit conditional invert. */
+static void
+csinv32 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
+}
+
+/* 64 bit conditional invert. */
+static void
+csinv64 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
+}
+
+/* 32 bit conditional negate. */
+static void
+csneg32 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
+}
+
+/* 64 bit conditional negate. */
+static void
+csneg64 (sim_cpu *cpu, CondCode cc)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ testConditionCode (cpu, cc)
+ ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
+}
+
+static void
+dexCondSelect (sim_cpu *cpu)
+{
+ /* assert instr[28,21] = 11011011
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
+ 100 ==> CSINV, 101 ==> CSNEG,
+ _1_ ==> UNALLOC
+ instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
+ instr[15,12] = cond
+ instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
+
+ CondCode cc;
+ uint32_t dispatch;
+ uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t op2 = uimm (aarch64_get_instr (cpu), 11, 10);
+
+ if (S == 1)
+ HALT_UNALLOC;
+
+ if (op2 & 0x2)
+ HALT_UNALLOC;
+
+ cc = condcode (aarch64_get_instr (cpu), 12);
+ dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 1) | op2);
+
+ switch (dispatch)
+ {
+ case 0: csel32 (cpu, cc); return;
+ case 1: csinc32 (cpu, cc); return;
+ case 2: csinv32 (cpu, cc); return;
+ case 3: csneg32 (cpu, cc); return;
+ case 4: csel64 (cpu, cc); return;
+ case 5: csinc64 (cpu, cc); return;
+ case 6: csinv64 (cpu, cc); return;
+ case 7: csneg64 (cpu, cc); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+/* Some helpers for counting leading 1 or 0 bits. */
+
+/* Counts the number of leading bits which are the same
+ in a 32 bit value in the range 1 to 32. */
+static uint32_t
+leading32 (uint32_t value)
+{
+ int32_t mask= 0xffff0000;
+ uint32_t count= 16; /* Counts number of bits set in mask. */
+ uint32_t lo = 1; /* Lower bound for number of sign bits. */
+ uint32_t hi = 32; /* Upper bound for number of sign bits. */
+
+ while (lo + 1 < hi)
+ {
+ int32_t test = (value & mask);
+
+ if (test == 0 || test == mask)
+ {
+ lo = count;
+ count = (lo + hi) / 2;
+ mask >>= (count - lo);
+ }
+ else
+ {
+ hi = count;
+ count = (lo + hi) / 2;
+ mask <<= hi - count;
+ }
+ }
+
+ if (lo != hi)
+ {
+ int32_t test;
+
+ mask >>= 1;
+ test = (value & mask);
+
+ if (test == 0 || test == mask)
+ count = hi;
+ else
+ count = lo;
+ }
+
+ return count;
+}
+
+/* Counts the number of leading bits which are the same
+ in a 64 bit value in the range 1 to 64. */
+static uint64_t
+leading64 (uint64_t value)
+{
+ int64_t mask= 0xffffffff00000000LL;
+ uint64_t count = 32; /* Counts number of bits set in mask. */
+ uint64_t lo = 1; /* Lower bound for number of sign bits. */
+ uint64_t hi = 64; /* Upper bound for number of sign bits. */
+
+ while (lo + 1 < hi)
+ {
+ int64_t test = (value & mask);
+
+ if (test == 0 || test == mask)
+ {
+ lo = count;
+ count = (lo + hi) / 2;
+ mask >>= (count - lo);
+ }
+ else
+ {
+ hi = count;
+ count = (lo + hi) / 2;
+ mask <<= hi - count;
+ }
+ }
+
+ if (lo != hi)
+ {
+ int64_t test;
+
+ mask >>= 1;
+ test = (value & mask);
+
+ if (test == 0 || test == mask)
+ count = hi;
+ else
+ count = lo;
+ }
+
+ return count;
+}
+
+/* Bit operations. */
+/* N.B register args may not be SP. */
+
+/* 32 bit count leading sign bits. */
+static void
+cls32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. the result needs to exclude the leading bit. */
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
+}
+
+/* 64 bit count leading sign bits. */
+static void
+cls64 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. the result needs to exclude the leading bit. */
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
+}
+
+/* 32 bit count leading zero bits. */
+static void
+clz32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+
+ /* if the sign (top) bit is set then the count is 0. */
+ if (pick32 (value, 31, 31))
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
+ else
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
+}
+
+/* 64 bit count leading zero bits. */
+static void
+clz64 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+
+ /* if the sign (top) bit is set then the count is 0. */
+ if (pick64 (value, 63, 63))
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
+ else
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
+}
+
+/* 32 bit reverse bits. */
+static void
+rbit32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t result = 0;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ result <<= 1;
+ result |= (value & 1);
+ value >>= 1;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+/* 64 bit reverse bits. */
+static void
+rbit64 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t result = 0;
+ int i;
+
+ for (i = 0; i < 64; i++)
+ {
+ result <<= 1;
+ result |= (value & 1L);
+ value >>= 1;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+/* 32 bit reverse bytes. */
+static void
+rev32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t result = 0;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ {
+ result <<= 8;
+ result |= (value & 0xff);
+ value >>= 8;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+/* 64 bit reverse bytes. */
+static void
+rev64 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t result = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ result <<= 8;
+ result |= (value & 0xffULL);
+ value >>= 8;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+/* 32 bit reverse shorts. */
+/* N.B.this reverses the order of the bytes in each half word. */
+static void
+revh32 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint32_t result = 0;
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ result <<= 8;
+ result |= (value & 0x00ff00ff);
+ value >>= 8;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+/* 64 bit reverse shorts. */
+/* N.B.this reverses the order of the bytes in each half word. */
+static void
+revh64 (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ uint64_t result = 0;
+ int i;
+
+ for (i = 0; i < 2; i++)
+ {
+ result <<= 8;
+ result |= (value & 0x00ff00ff00ff00ffULL);
+ value >>= 8;
+ }
+ aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
+}
+
+static void
+dexDataProc1Source (sim_cpu *cpu)
+{
+ /* assert instr[30] == 1
+ aarch64_get_instr (cpu)[28,21] == 111010110
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
+ instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
+ instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
+ 000010 ==> REV, 000011 ==> UNALLOC
+ 000100 ==> CLZ, 000101 ==> CLS
+ ow ==> UNALLOC
+ instr[9,5] = rn : may not be SP
+ instr[4,0] = rd : may not be SP. */
+
+ uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t opcode2 = uimm (aarch64_get_instr (cpu), 20, 16);
+ uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10);
+ uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 31) << 3) | opcode);
+
+ if (S == 1)
+ HALT_UNALLOC;
+
+ if (opcode2 != 0)
+ HALT_UNALLOC;
+
+ if (opcode & 0x38)
+ HALT_UNALLOC;
+
+ switch (dispatch)
+ {
+ case 0: rbit32 (cpu); return;
+ case 1: revh32 (cpu); return;
+ case 2: rev32 (cpu); return;
+ case 4: clz32 (cpu); return;
+ case 5: cls32 (cpu); return;
+ case 8: rbit64 (cpu); return;
+ case 9: revh64 (cpu); return;
+ case 10:rev32 (cpu); return;
+ case 11:rev64 (cpu); return;
+ case 12:clz64 (cpu); return;
+ case 13:cls64 (cpu); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+/* Variable shift.
+ Shifts by count supplied in register.
+ N.B register args may not be SP.
+ These all use the shifted auxiliary function for
+ simplicity and clarity. Writing the actual shift
+ inline would avoid a branch and so be faster but
+ would also necessitate getting signs right. */
+
+/* 32 bit arithmetic shift right. */
+static void
+asrv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
+ (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
+}
+
+/* 64 bit arithmetic shift right. */
+static void
+asrv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
+ (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
+}
+
+/* 32 bit logical shift left. */
+static void
+lslv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
+ (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
+}
+
+/* 64 bit arithmetic shift left. */
+static void
+lslv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
+ (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
+}
+
+/* 32 bit logical shift right. */
+static void
+lsrv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
+ (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
+}
+
+/* 64 bit logical shift right. */
+static void
+lsrv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
+ (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
+}
+
+/* 32 bit rotate right. */
+static void
+rorv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
+ (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
+}
+
+/* 64 bit rotate right. */
+static void
+rorv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
+ (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
+}
+
+
+/* divide. */
+
+/* 32 bit signed divide. */
+static void
+cpuiv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ /* N.B. the pseudo-code does the divide using 64 bit data. */
+ /* TODO : check that this rounds towards zero as required. */
+ int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
+ int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
+
+ aarch64_set_reg_s64 (cpu, rd, NO_SP,
+ divisor ? ((int32_t) (dividend / divisor)) : 0);
+}
+
+/* 64 bit signed divide. */
+static void
+cpuiv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* TODO : check that this rounds towards zero as required. */
+ int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
+
+ aarch64_set_reg_s64
+ (cpu, rd, NO_SP,
+ divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
+}
+
+/* 32 bit unsigned divide. */
+static void
+udiv32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. the pseudo-code does the divide using 64 bit data. */
+ uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
+ uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ divisor ? (uint32_t) (dividend / divisor) : 0);
+}
+
+/* 64 bit unsigned divide. */
+static void
+udiv64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* TODO : check that this rounds towards zero as required. */
+ uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
+
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
+}
+
+static void
+dexDataProc2Source (sim_cpu *cpu)
+{
+ /* assert instr[30] == 0
+ instr[28,21] == 11010110
+ instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
+ instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
+ instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
+ 001000 ==> LSLV, 001001 ==> LSRV
+ 001010 ==> ASRV, 001011 ==> RORV
+ ow ==> UNALLOC. */
+
+ uint32_t dispatch;
+ uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29);
+ uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10);
+
+ if (S == 1)
+ HALT_UNALLOC;
+
+ if (opcode & 0x34)
+ HALT_UNALLOC;
+
+ dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 31) << 3)
+ | (uimm (opcode, 3, 3) << 2)
+ | uimm (opcode, 1, 0));
+ switch (dispatch)
+ {
+ case 2: udiv32 (cpu); return;
+ case 3: cpuiv32 (cpu); return;
+ case 4: lslv32 (cpu); return;
+ case 5: lsrv32 (cpu); return;
+ case 6: asrv32 (cpu); return;
+ case 7: rorv32 (cpu); return;
+ case 10: udiv64 (cpu); return;
+ case 11: cpuiv64 (cpu); return;
+ case 12: lslv64 (cpu); return;
+ case 13: lsrv64 (cpu); return;
+ case 14: asrv64 (cpu); return;
+ case 15: rorv64 (cpu); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+
+/* Multiply. */
+
+/* 32 bit multiply and add. */
+static void
+madd32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, ra, NO_SP)
+ + aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ * aarch64_get_reg_u32 (cpu, rm, NO_SP));
+}
+
+/* 64 bit multiply and add. */
+static void
+madd64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, ra, NO_SP)
+ + aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ * aarch64_get_reg_u64 (cpu, rm, NO_SP));
+}
+
+/* 32 bit multiply and sub. */
+static void
+msub32 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u32 (cpu, ra, NO_SP)
+ - aarch64_get_reg_u32 (cpu, rn, NO_SP)
+ * aarch64_get_reg_u32 (cpu, rm, NO_SP));
+}
+
+/* 64 bit multiply and sub. */
+static void
+msub64 (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, ra, NO_SP)
+ - aarch64_get_reg_u64 (cpu, rn, NO_SP)
+ * aarch64_get_reg_u64 (cpu, rm, NO_SP));
+}
+
+/* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
+static void
+smaddl (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. we need to multiply the signed 32 bit values in rn, rm to
+ obtain a 64 bit product. */
+ aarch64_set_reg_s64
+ (cpu, rd, NO_SP,
+ aarch64_get_reg_s64 (cpu, ra, NO_SP)
+ + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
+ * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
+}
+
+/* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
+static void
+smsubl (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. we need to multiply the signed 32 bit values in rn, rm to
+ obtain a 64 bit product. */
+ aarch64_set_reg_s64
+ (cpu, rd, NO_SP,
+ aarch64_get_reg_s64 (cpu, ra, NO_SP)
+ - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
+ * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
+}
+
+/* Integer Multiply/Divide. */
+
+/* First some macros and a helper function. */
+/* Macros to test or access elements of 64 bit words. */
+
+/* Mask used to access lo 32 bits of 64 bit unsigned int. */
+#define LOW_WORD_MASK ((1ULL << 32) - 1)
+/* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
+#define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
+/* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
+#define highWordToU64(_value_u64) ((_value_u64) >> 32)
+
+/* Offset of sign bit in 64 bit signed integger. */
+#define SIGN_SHIFT_U64 63
+/* The sign bit itself -- also identifies the minimum negative int value. */
+#define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
+/* Return true if a 64 bit signed int presented as an unsigned int is the
+ most negative value. */
+#define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
+/* Return true (non-zero) if a 64 bit signed int presented as an unsigned
+ int has its sign bit set to false. */
+#define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
+/* Return 1L or -1L according to whether a 64 bit signed int presented as
+ an unsigned int has its sign bit set or not. */
+#define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
+/* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
+#define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
+
+/* Multiply two 64 bit ints and return.
+ the hi 64 bits of the 128 bit product. */
+
+static uint64_t
+mul64hi (uint64_t value1, uint64_t value2)
+{
+ uint64_t resultmid1;
+ uint64_t result;
+ uint64_t value1_lo = lowWordToU64 (value1);
+ uint64_t value1_hi = highWordToU64 (value1) ;
+ uint64_t value2_lo = lowWordToU64 (value2);
+ uint64_t value2_hi = highWordToU64 (value2);
+
+ /* Cross-multiply and collect results. */
+
+ uint64_t xproductlo = value1_lo * value2_lo;
+ uint64_t xproductmid1 = value1_lo * value2_hi;
+ uint64_t xproductmid2 = value1_hi * value2_lo;
+ uint64_t xproducthi = value1_hi * value2_hi;
+ uint64_t carry = 0;
+ /* Start accumulating 64 bit results. */
+ /* Drop bottom half of lowest cross-product. */
+ uint64_t resultmid = xproductlo >> 32;
+ /* Add in middle products. */
+ resultmid = resultmid + xproductmid1;
+
+ /* Check for overflow. */
+ if (resultmid < xproductmid1)
+ /* Carry over 1 into top cross-product. */
+ carry++;
+
+ resultmid1 = resultmid + xproductmid2;
+
+ /* Check for overflow. */
+ if (resultmid1 < xproductmid2)
+ /* Carry over 1 into top cross-product. */
+ carry++;
+
+ /* Drop lowest 32 bits of middle cross-product. */
+ result = resultmid1 >> 32;
+
+ /* Add top cross-product plus and any carry. */
+ result += xproducthi + carry;
+
+ return result;
+}
+
+/* Signed multiply high, source, source2 :
+ 64 bit, dest <-- high 64-bit of result. */
+static void
+smulh (sim_cpu *cpu)
+{
+ uint64_t uresult;
+ int64_t result;
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ GReg ra = greg (aarch64_get_instr (cpu), 10);
+ int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
+ int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
+ uint64_t uvalue1;
+ uint64_t uvalue2;
+ int64_t signum = 1;
+
+ if (ra != R31)
+ HALT_UNALLOC;
+
+ /* Convert to unsigned and use the unsigned mul64hi routine
+ the fix the sign up afterwards. */
+ if (value1 < 0)
+ {
+ signum *= -1L;
+ uvalue1 = -value1;
+ }
+ else
+ {
+ uvalue1 = value1;
+ }
+
+ if (value2 < 0)
+ {
+ signum *= -1L;
+ uvalue2 = -value2;
+ }
+ else
+ {
+ uvalue2 = value2;
+ }
+
+ uresult = mul64hi (uvalue1, uvalue2);
+ result = uresult;
+ result *= signum;
+
+ aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
+}
+
+/* Unsigned multiply add long -- source, source2 :
+ 32 bit, source3 : 64 bit. */
+static void
+umaddl (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. we need to multiply the signed 32 bit values in rn, rm to
+ obtain a 64 bit product. */
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, ra, NO_SP)
+ + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
+ * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
+}
+
+/* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
+static void
+umsubl (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ /* N.B. we need to multiply the signed 32 bit values in rn, rm to
+ obtain a 64 bit product. */
+ aarch64_set_reg_u64
+ (cpu, rd, NO_SP,
+ aarch64_get_reg_u64 (cpu, ra, NO_SP)
+ - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
+ * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
+}
+
+/* Unsigned multiply high, source, source2 :
+ 64 bit, dest <-- high 64-bit of result. */
+static void
+umulh (sim_cpu *cpu)
+{
+ unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16);
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0);
+ GReg ra = greg (aarch64_get_instr (cpu), 10);
+
+ if (ra != R31)
+ HALT_UNALLOC;
+
+ aarch64_set_reg_u64 (cpu, rd, NO_SP,
+ mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
+ aarch64_get_reg_u64 (cpu, rm, NO_SP)));
+}
+
+static void
+dexDataProc3Source (sim_cpu *cpu)
+{
+ /* assert instr[28,24] == 11011. */
+ /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
+ instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
+ instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
+ instr[15] = o0 : 0/1 ==> ok
+ instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
+ 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
+ 0100 ==> SMULH, (64 bit only)
+ 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
+ 1100 ==> UMULH (64 bit only)
+ ow ==> UNALLOC. */
+
+ uint32_t dispatch;
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t op54 = uimm (aarch64_get_instr (cpu), 30, 29);
+ uint32_t op31 = uimm (aarch64_get_instr (cpu), 23, 21);
+ uint32_t o0 = uimm (aarch64_get_instr (cpu), 15, 15);
+
+ if (op54 != 0)
+ HALT_UNALLOC;
+
+ if (size == 0)
+ {
+ if (op31 != 0)
+ HALT_UNALLOC;
+
+ if (o0 == 0)
+ madd32 (cpu);
+ else
+ msub32 (cpu);
+ return;
+ }
+
+ dispatch = (op31 << 1) | o0;
+
+ switch (dispatch)
+ {
+ case 0: madd64 (cpu); return;
+ case 1: msub64 (cpu); return;
+ case 2: smaddl (cpu); return;
+ case 3: smsubl (cpu); return;
+ case 4: smulh (cpu); return;
+ case 10: umaddl (cpu); return;
+ case 11: umsubl (cpu); return;
+ case 12: umulh (cpu); return;
+ default: HALT_UNALLOC;
+ }
+}
+
+static void
+dexDPReg (sim_cpu *cpu)
+{
+ /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
+ assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
+ bits [28:24:21] of a DPReg are the secondary dispatch vector. */
+ uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
+
+ switch (group2)
+ {
+ case DPREG_LOG_000:
+ case DPREG_LOG_001:
+ dexLogicalShiftedRegister (cpu); return;
+
+ case DPREG_ADDSHF_010:
+ dexAddSubtractShiftedRegister (cpu); return;
+
+ case DPREG_ADDEXT_011:
+ dexAddSubtractExtendedRegister (cpu); return;
+
+ case DPREG_ADDCOND_100:
+ {
+ /* This set bundles a variety of different operations. */
+ /* Check for. */
+ /* 1) add/sub w carry. */
+ uint32_t mask1 = 0x1FE00000U;
+ uint32_t val1 = 0x1A000000U;
+ /* 2) cond compare register/immediate. */
+ uint32_t mask2 = 0x1FE00000U;
+ uint32_t val2 = 0x1A400000U;
+ /* 3) cond select. */
+ uint32_t mask3 = 0x1FE00000U;
+ uint32_t val3 = 0x1A800000U;
+ /* 4) data proc 1/2 source. */
+ uint32_t mask4 = 0x1FE00000U;
+ uint32_t val4 = 0x1AC00000U;
+
+ if ((aarch64_get_instr (cpu) & mask1) == val1)
+ dexAddSubtractWithCarry (cpu);
+
+ else if ((aarch64_get_instr (cpu) & mask2) == val2)
+ CondCompare (cpu);
+
+ else if ((aarch64_get_instr (cpu) & mask3) == val3)
+ dexCondSelect (cpu);
+
+ else if ((aarch64_get_instr (cpu) & mask4) == val4)
+ {
+ /* Bit 30 is clear for data proc 2 source
+ and set for data proc 1 source. */
+ if (aarch64_get_instr (cpu) & (1U << 30))
+ dexDataProc1Source (cpu);
+ else
+ dexDataProc2Source (cpu);
+ }
+
+ else
+ /* Should not reach here. */
+ HALT_NYI;
+
+ return;
+ }
+
+ case DPREG_3SRC_110:
+ dexDataProc3Source (cpu); return;
+
+ case DPREG_UNALLOC_101:
+ HALT_UNALLOC;
+
+ case DPREG_3SRC_111:
+ dexDataProc3Source (cpu); return;
+
+ default:
+ /* Should never reach here. */
+ HALT_NYI;
+ }
+}
+
+/* Unconditional Branch immediate.
+ Offset is a PC-relative byte offset in the range +/- 128MiB.
+ The offset is assumed to be raw from the decode i.e. the
+ simulator is expected to scale them from word offsets to byte. */
+
+/* Unconditional branch. */
+static void
+buc (sim_cpu *cpu, int32_t offset)
+{
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+static unsigned stack_depth = 0;
+
+/* Unconditional branch and link -- writes return PC to LR. */
+static void
+bl (sim_cpu *cpu, int32_t offset)
+{
+ aarch64_save_LR (cpu);
+ aarch64_set_next_PC_by_offset (cpu, offset);
+
+ if (TRACE_BRANCH_P (cpu))
+ {
+ ++ stack_depth;
+ TRACE_BRANCH (cpu,
+ " %*scall %" PRIx64 " [%s]"
+ " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
+ stack_depth, " ", aarch64_get_next_PC (cpu),
+ aarch64_get_func (aarch64_get_next_PC (cpu)),
+ aarch64_get_reg_u64 (cpu, 0, NO_SP),
+ aarch64_get_reg_u64 (cpu, 1, NO_SP),
+ aarch64_get_reg_u64 (cpu, 2, NO_SP)
+ );
+ }
+}
+
+/* Unconditional Branch register.
+ Branch/return address is in source register. */
+
+/* Unconditional branch. */
+static void
+br (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
+}
+
+/* Unconditional branch and link -- writes return PC to LR. */
+static void
+blr (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+
+ /* The pseudo code in the spec says we update LR before fetching.
+ the value from the rn. */
+ aarch64_save_LR (cpu);
+ aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
+
+ if (TRACE_BRANCH_P (cpu))
+ {
+ ++ stack_depth;
+ TRACE_BRANCH (cpu,
+ " %*scall %" PRIx64 " [%s]"
+ " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
+ stack_depth, " ", aarch64_get_next_PC (cpu),
+ aarch64_get_func (aarch64_get_next_PC (cpu)),
+ aarch64_get_reg_u64 (cpu, 0, NO_SP),
+ aarch64_get_reg_u64 (cpu, 1, NO_SP),
+ aarch64_get_reg_u64 (cpu, 2, NO_SP)
+ );
+ }
+}
+
+/* Return -- assembler will default source to LR this is functionally
+ equivalent to br but, presumably, unlike br it side effects the
+ branch predictor. */
+static void
+ret (sim_cpu *cpu)
+{
+ unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5);
+ aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
+
+ if (TRACE_BRANCH_P (cpu))
+ {
+ TRACE_BRANCH (cpu,
+ " %*sreturn [result: %" PRIx64 "]",
+ stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
+ -- stack_depth;
+ }
+}
+
+/* NOP -- we implement this and call it from the decode in case we
+ want to intercept it later. */
+
+static void
+nop (sim_cpu *cpu)
+{
+}
+
+/* Data synchronization barrier. */
+
+static void
+dsb (sim_cpu *cpu)
+{
+}
+
+/* Data memory barrier. */
+
+static void
+dmb (sim_cpu *cpu)
+{
+}
+
+/* Instruction synchronization barrier. */
+
+static void
+isb (sim_cpu *cpu)
+{
+}
+
+static void
+dexBranchImmediate (sim_cpu *cpu)
+{
+ /* assert instr[30,26] == 00101
+ instr[31] ==> 0 == B, 1 == BL
+ instr[25,0] == imm26 branch offset counted in words. */
+
+ uint32_t top = uimm (aarch64_get_instr (cpu), 31, 31);
+ /* We have a 26 byte signed word offset which we need to pass to the
+ execute routine as a signed byte offset. */
+ int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
+
+ if (top)
+ bl (cpu, offset);
+ else
+ buc (cpu, offset);
+}
+
+/* Control Flow. */
+
+/* Conditional branch
+
+ Offset is a PC-relative byte offset in the range +/- 1MiB pos is
+ a bit position in the range 0 .. 63
+
+ cc is a CondCode enum value as pulled out of the decode
+
+ N.B. any offset register (source) can only be Xn or Wn. */
+
+static void
+bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
+{
+ /* the test returns TRUE if CC is met. */
+ if (testConditionCode (cpu, cc))
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* 32 bit branch on register non-zero. */
+static void
+cbnz32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* 64 bit branch on register zero. */
+static void
+cbnz (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* 32 bit branch on register non-zero. */
+static void
+cbz32 (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* 64 bit branch on register zero. */
+static void
+cbz (sim_cpu *cpu, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* Branch on register bit test non-zero -- one size fits all. */
+static void
+tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+/* branch on register bit test zero -- one size fits all. */
+static void
+tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
+{
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
+ aarch64_set_next_PC_by_offset (cpu, offset);
+}
+
+static void
+dexCompareBranchImmediate (sim_cpu *cpu)
+{
+ /* instr[30,25] = 01 1010
+ instr[31] = size : 0 ==> 32, 1 ==> 64
+ instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
+ instr[23,5] = simm19 branch offset counted in words
+ instr[4,0] = rt */
+
+ uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31);
+ uint32_t op = uimm (aarch64_get_instr (cpu), 24, 24);
+ int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
+
+ if (size == 0)
+ {
+ if (op == 0)
+ cbz32 (cpu, offset);
+ else
+ cbnz32 (cpu, offset);
+ }
+ else
+ {
+ if (op == 0)
+ cbz (cpu, offset);
+ else
+ cbnz (cpu, offset);
+ }
+}
+
+static void
+dexTestBranchImmediate (sim_cpu *cpu)
+{
+ /* instr[31] = b5 : bit 5 of test bit idx
+ instr[30,25] = 01 1011
+ instr[24] = op : 0 ==> TBZ, 1 == TBNZ
+ instr[23,19] = b40 : bits 4 to 0 of test bit idx
+ instr[18,5] = simm14 : signed offset counted in words
+ instr[4,0] = uimm5 */
+
+ uint32_t pos = ((uimm (aarch64_get_instr (cpu), 31, 31) << 4)
+ | uimm (aarch64_get_instr (cpu), 23,19));
+ int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
+
+ NYI_assert (30, 25, 0x1b);
+
+ if (uimm (aarch64_get_instr (cpu), 24, 24) == 0)
+ tbz (cpu, pos, offset);
+ else
+ tbnz (cpu, pos, offset);
+}
+
+static void
+dexCondBranchImmediate (sim_cpu *cpu)
+{
+ /* instr[31,25] = 010 1010
+ instr[24] = op1; op => 00 ==> B.cond
+ instr[23,5] = simm19 : signed offset counted in words
+ instr[4] = op0
+ instr[3,0] = cond */
+
+ int32_t offset;
+ CondCode cc;
+ uint32_t op = ((uimm (aarch64_get_instr (cpu), 24, 24) << 1)
+ | uimm (aarch64_get_instr (cpu), 4, 4));
+
+ NYI_assert (31, 25, 0x2a);
+
+ if (op != 0)
+ HALT_UNALLOC;
+
+ offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
+ cc = condcode (aarch64_get_instr (cpu), 0);
+
+ bcc (cpu, offset, cc);
+}
+
+static void
+dexBranchRegister (sim_cpu *cpu)
+{
+ /* instr[31,25] = 110 1011
+ instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
+ instr[20,16] = op2 : must be 11111
+ instr[15,10] = op3 : must be 000000
+ instr[4,0] = op2 : must be 11111. */
+
+ uint32_t op = uimm (aarch64_get_instr (cpu), 24, 21);
+ uint32_t op2 = uimm (aarch64_get_instr (cpu), 20, 16);
+ uint32_t op3 = uimm (aarch64_get_instr (cpu), 15, 10);
+ uint32_t op4 = uimm (aarch64_get_instr (cpu), 4, 0);
+
+ NYI_assert (31, 25, 0x6b);
+
+ if (op2 != 0x1F || op3 != 0 || op4 != 0)
+ HALT_UNALLOC;
+
+ if (op == 0)
+ br (cpu);
+
+ else if (op == 1)
+ blr (cpu);
+
+ else if (op == 2)
+ ret (cpu);
+
+ else
+ {
+ /* ERET and DRPS accept 0b11111 for rn = aarch64_get_instr (cpu)[4,0]. */
+ /* anything else is unallocated. */
+ uint32_t rn = greg (aarch64_get_instr (cpu), 0);
+
+ if (rn != 0x1f)
+ HALT_UNALLOC;
+
+ if (op == 4 || op == 5)
+ HALT_NYI;
+
+ HALT_UNALLOC;
+ }
+}
+
+/* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
+ but this may not be available. So instead we define the values we need
+ here. */
+#define AngelSVC_Reason_Open 0x01
+#define AngelSVC_Reason_Close 0x02
+#define AngelSVC_Reason_Write 0x05
+#define AngelSVC_Reason_Read 0x06
+#define AngelSVC_Reason_IsTTY 0x09
+#define AngelSVC_Reason_Seek 0x0A
+#define AngelSVC_Reason_FLen 0x0C
+#define AngelSVC_Reason_Remove 0x0E
+#define AngelSVC_Reason_Rename 0x0F
+#define AngelSVC_Reason_Clock 0x10
+#define AngelSVC_Reason_Time 0x11
+#define AngelSVC_Reason_System 0x12
+#define AngelSVC_Reason_Errno 0x13
+#define AngelSVC_Reason_GetCmdLine 0x15
+#define AngelSVC_Reason_HeapInfo 0x16
+#define AngelSVC_Reason_ReportException 0x18
+#define AngelSVC_Reason_Elapsed 0x30
+
+
+static void
+handle_halt (sim_cpu *cpu, uint32_t val)
+{
+ uint64_t result = 0;
+
+ if (val != 0xf000)
+ {
+ TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGTRAP);
+ }
+
+ /* We have encountered an Angel SVC call. See if we can process it. */
+ switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
+ {
+ case AngelSVC_Reason_HeapInfo:
+ {
+ /* Get the values. */
+ uint64_t stack_top = aarch64_get_stack_start (cpu);
+ uint64_t heap_base = aarch64_get_heap_start (cpu);
+
+ /* Get the pointer */
+ uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
+ ptr = aarch64_get_mem_u64 (cpu, ptr);
+
+ /* Fill in the memory block. */
+ /* Start addr of heap. */
+ aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
+ /* End addr of heap. */
+ aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
+ /* Lowest stack addr. */
+ aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
+ /* Initial stack addr. */
+ aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
+
+ TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
+ }
+ break;
+
+ case AngelSVC_Reason_Open:
+ {
+ /* Get the pointer */
+ /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
+ /* FIXME: For now we just assume that we will only be asked
+ to open the standard file descriptors. */
+ static int fd = 0;
+ result = fd ++;
+
+ TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
+ }
+ break;
+
+ case AngelSVC_Reason_Close:
+ {
+ uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
+ TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
+ result = 0;
+ }
+ break;
+
+ case AngelSVC_Reason_Errno:
+ result = 0;
+ TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
+ break;
+
+ case AngelSVC_Reason_Clock:
+ result =
+#ifdef CLOCKS_PER_SEC
+ (CLOCKS_PER_SEC >= 100)
+ ? (clock () / (CLOCKS_PER_SEC / 100))
+ : ((clock () * 100) / CLOCKS_PER_SEC)
+#else
+ /* Presume unix... clock() returns microseconds. */
+ (clock () / 10000)
+#endif
+ ;
+ TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
+ break;
+
+ case AngelSVC_Reason_GetCmdLine:
+ {
+ /* Get the pointer */
+ uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
+ ptr = aarch64_get_mem_u64 (cpu, ptr);
+
+ /* FIXME: No command line for now. */
+ aarch64_set_mem_u64 (cpu, ptr, 0);
+ TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
+ }
+ break;
+
+ case AngelSVC_Reason_IsTTY:
+ result = 1;
+ TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
+ break;
+
+ case AngelSVC_Reason_Write:
+ {
+ /* Get the pointer */
+ uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
+ /* Get the write control block. */
+ uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
+ uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
+ uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
+
+ TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
+ PRIx64 " on descriptor %" PRIx64,
+ len, buf, fd);
+
+ if (len > 1280)
+ {
+ TRACE_SYSCALL (cpu,
+ " AngelSVC: Write: Suspiciously long write: %ld",
+ (long) len);
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGBUS);
+ }
+ else if (fd == 1)
+ {
+ printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
+ if (disas)
+ /* So that the output stays in sync with trace output. */
+ fflush (stdout);
+ }
+ else if (fd == 2)
+ {
+ TRACE (cpu, 0, "\n");
+ sim_io_eprintf (CPU_STATE (cpu), "%.*s",
+ (int) len, aarch64_get_mem_ptr (cpu, buf));
+ TRACE (cpu, 0, "\n");
+ }
+ else
+ {
+ TRACE_SYSCALL (cpu,
+ " AngelSVC: Write: Unexpected file handle: %d",
+ (int) fd);
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGABRT);
+ }
+ }
+ break;
+
+ case AngelSVC_Reason_ReportException:
+ {
+ /* Get the pointer */
+ uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
+ /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
+ uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
+ uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
+
+ TRACE_SYSCALL (cpu,
+ "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
+ type, state);
+
+ if (type == 0x20026)
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_exited, state);
+ else
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGINT);
+ }
+ break;
+
+ case AngelSVC_Reason_Read:
+ case AngelSVC_Reason_FLen:
+ case AngelSVC_Reason_Seek:
+ case AngelSVC_Reason_Remove:
+ case AngelSVC_Reason_Time:
+ case AngelSVC_Reason_System:
+ case AngelSVC_Reason_Rename:
+ case AngelSVC_Reason_Elapsed:
+ default:
+ TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
+ aarch64_get_reg_u32 (cpu, 0, NO_SP));
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_stopped, SIM_SIGTRAP);
+ }
+
+ aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
+}
+
+static void
+dexExcpnGen (sim_cpu *cpu)
+{
+ /* instr[31:24] = 11010100
+ instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
+ 010 ==> HLT, 101 ==> DBG GEN EXCPN
+ instr[20,5] = imm16
+ instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
+ instr[1,0] = LL : discriminates opc */
+
+ uint32_t opc = uimm (aarch64_get_instr (cpu), 23, 21);
+ uint32_t imm16 = uimm (aarch64_get_instr (cpu), 20, 5);
+ uint32_t opc2 = uimm (aarch64_get_instr (cpu), 4, 2);
+ uint32_t LL;
+
+ NYI_assert (31, 24, 0xd4);
+
+ if (opc2 != 0)
+ HALT_UNALLOC;
+
+ LL = uimm (aarch64_get_instr (cpu), 1, 0);
+
+ /* We only implement HLT and BRK for now. */
+ if (opc == 1 && LL == 0)
+ {
+ TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
+ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
+ sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
+ }
+
+ if (opc == 2 && LL == 0)
+ handle_halt (cpu, imm16);
+
+ else if (opc == 0 || opc == 5)
+ HALT_NYI;
+
+ else
+ HALT_UNALLOC;
+}
+
+static void
+dexSystem (sim_cpu *cpu)
+{
+ /* instr[31:22] = 1101 01010 0
+ instr[21] = L
+ instr[20,19] = op0
+ instr[18,16] = op1
+ instr[15,12] = CRn
+ instr[11,8] = CRm
+ instr[7,5] = op2
+ instr[4,0] = uimm5 */
+
+ /* We are interested in HINT, DSB, DMB and ISB
+
+ Hint #0 encodes NOOP (this is the only hint we care about)
+ L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
+ CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
+
+ DSB, DMB, ISB are data store barrier, data memory barrier and
+ instruction store barrier, respectively, where
+
+ L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
+ op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
+ CRm<3:2> ==> domain, CRm<1:0> ==> types,
+ domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
+ 10 ==> InerShareable, 11 ==> FullSystem
+ types : 01 ==> Reads, 10 ==> Writes,
+ 11 ==> All, 00 ==> All (domain == FullSystem). */
+
+ unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0);
+ uint32_t l_op0_op1_crn = uimm (aarch64_get_instr (cpu), 21, 12);
+
+ NYI_assert (31, 22, 0x354);
+
+ switch (l_op0_op1_crn)
+ {
+ case 0x032:
+ if (rt == 0x1F)
+ {
+ /* NOP has CRm != 0000 OR. */
+ /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
+ uint32_t crm = uimm (aarch64_get_instr (cpu), 11, 8);
+ uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5);
+
+ if (crm != 0 || (op2 == 0 || op2 > 5))
+ {
+ /* Actually call nop method so we can reimplement it later. */
+ nop (cpu);
+ return;
+ }
+ }
+ HALT_NYI;
+
+ case 0x033:
+ {
+ uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5);
+
+ switch (op2)
+ {
+ case 2:
+ HALT_NYI;
+
+ case 4: dsb (cpu); return;
+ case 5: dmb (cpu); return;
+ case 6: isb (cpu); return;
+ case 7:
+ default: HALT_UNALLOC;
+ }
+ }
+
+ case 0x3B0:
+ /* MRS Wt, sys-reg. */
+ /* FIXME: Ignore for now. */
+ return;
+
+ case 0x3B4:
+ case 0x3BD:
+ /* MRS Xt, sys-reg. */
+ /* FIXME: Ignore for now. */
+ return;
+
+ case 0x0B7:
+ /* DC <type>, x<n>. */
+ /* FIXME: Ignore for now. */
+ return;
+
+ default:
+ if (uimm (aarch64_get_instr (cpu), 21, 20) == 0x1)
+ /* MSR <sys-reg>, <Xreg>. */
+ return;
+ HALT_NYI;
+ }
+}
+
+static void
+dexBr (sim_cpu *cpu)
+{
+ /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
+ assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
+ bits [31,29] of a BrExSys are the secondary dispatch vector. */
+ uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
+
+ switch (group2)
+ {
+ case BR_IMM_000:
+ return dexBranchImmediate (cpu);
+
+ case BR_IMMCMP_001:
+ /* Compare has bit 25 clear while test has it set. */
+ if (!uimm (aarch64_get_instr (cpu), 25, 25))
+ dexCompareBranchImmediate (cpu);
+ else
+ dexTestBranchImmediate (cpu);
+ return;
+
+ case BR_IMMCOND_010:
+ /* This is a conditional branch if bit 25 is clear otherwise
+ unallocated. */
+ if (!uimm (aarch64_get_instr (cpu), 25, 25))
+ dexCondBranchImmediate (cpu);
+ else
+ HALT_UNALLOC;
+ return;
+
+ case BR_UNALLOC_011:
+ HALT_UNALLOC;
+
+ case BR_IMM_100:
+ dexBranchImmediate (cpu);
+ return;
+
+ case BR_IMMCMP_101:
+ /* Compare has bit 25 clear while test has it set. */
+ if (!uimm (aarch64_get_instr (cpu), 25, 25))
+ dexCompareBranchImmediate (cpu);
+ else
+ dexTestBranchImmediate (cpu);
+ return;
+
+ case BR_REG_110:
+ /* Unconditional branch reg has bit 25 set. */
+ if (uimm (aarch64_get_instr (cpu), 25, 25))
+ dexBranchRegister (cpu);
+
+ /* This includes both Excpn Gen, System and unalloc operations.
+ We need to decode the Excpn Gen operation BRK so we can plant
+ debugger entry points.
+ Excpn Gen operations have aarch64_get_instr (cpu)[24] = 0.
+ we need to decode at least one of the System operations NOP
+ which is an alias for HINT #0.
+ System operations have aarch64_get_instr (cpu)[24,22] = 100. */
+ else if (uimm (aarch64_get_instr (cpu), 24, 24) == 0)
+ dexExcpnGen (cpu);
+
+ else if (uimm (aarch64_get_instr (cpu), 24, 22) == 4)
+ dexSystem (cpu);
+
+ else
+ HALT_UNALLOC;
+
+ return;
+
+ case BR_UNALLOC_111:
+ HALT_UNALLOC;
+
+ default:
+ /* Should never reach here. */
+ HALT_NYI;
+ }
+}
+
+static void
+aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
+{
+ /* We need to check if gdb wants an in here. */
+ /* checkBreak (cpu);. */
+
+ uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
+
+ switch (group)
+ {
+ case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
+ case GROUP_LDST_0100: dexLdSt (cpu); break;
+ case GROUP_DPREG_0101: dexDPReg (cpu); break;
+ case GROUP_LDST_0110: dexLdSt (cpu); break;
+ case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
+ case GROUP_DPIMM_1000: dexDPImm (cpu); break;
+ case GROUP_DPIMM_1001: dexDPImm (cpu); break;
+ case GROUP_BREXSYS_1010: dexBr (cpu); break;
+ case GROUP_BREXSYS_1011: dexBr (cpu); break;
+ case GROUP_LDST_1100: dexLdSt (cpu); break;
+ case GROUP_DPREG_1101: dexDPReg (cpu); break;
+ case GROUP_LDST_1110: dexLdSt (cpu); break;
+ case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
+
+ case GROUP_UNALLOC_0001:
+ case GROUP_UNALLOC_0010:
+ case GROUP_UNALLOC_0011:
+ HALT_UNALLOC;
+
+ default:
+ /* Should never reach here. */
+ HALT_NYI;
+ }
+}
+
+static bfd_boolean
+aarch64_step (sim_cpu *cpu)
+{
+ uint64_t pc = aarch64_get_PC (cpu);
+
+ if (pc == TOP_LEVEL_RETURN_PC)
+ return FALSE;
+
+ aarch64_set_next_PC (cpu, pc + 4);
+ aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
+
+ if (TRACE_INSN_P (cpu))
+ {
+ if (disas)
+ TRACE_INSN (cpu, " pc = %" PRIx64 " ", pc);
+ else
+ TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %x", pc,
+ aarch64_get_instr (cpu));
+ }
+ else if (disas)
+ sim_io_eprintf (CPU_STATE (cpu), " %" PRIx64 " ", pc);
+
+ if (disas)
+ aarch64_print_insn (CPU_STATE (cpu), pc);
+
+ aarch64_decode_and_execute (cpu, pc);
+
+ return TRUE;
+}
+
+void
+aarch64_run (SIM_DESC sd)
+{
+ sim_cpu *cpu = STATE_CPU (sd, 0);
+
+ while (aarch64_step (cpu))
+ aarch64_update_PC (cpu);
+
+ sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
+ sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
+}
+
+void
+aarch64_init (sim_cpu *cpu, uint64_t pc)
+{
+ uint64_t sp = aarch64_get_stack_start (cpu);
+
+ /* Install SP, FP and PC and set LR to -20
+ so we can detect a top-level return. */
+ aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
+ aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
+ aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
+ aarch64_set_next_PC (cpu, pc);
+ aarch64_update_PC (cpu);
+ aarch64_init_LIT_table ();
+}