diff options
author | Nick Clifton <nickc@redhat.com> | 2015-11-24 08:47:59 +0000 |
---|---|---|
committer | Nick Clifton <nickc@redhat.com> | 2015-11-24 08:47:59 +0000 |
commit | 2e8cf49e1387eba9c4ce062885b99a6eb76c01f8 (patch) | |
tree | 363800e2edad589cb37f72e10fc842097a8ec9c4 /sim/aarch64/simulator.c | |
parent | 351e610191016136a49ee2a0889f1c4929169fc6 (diff) | |
download | binutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.zip binutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.tar.gz binutils-2e8cf49e1387eba9c4ce062885b99a6eb76c01f8.tar.bz2 |
Add an AArch64 simulator to GDB.
sim * configure.tgt: Add aarch64 entry.
* configure: Regenerate.
* sim/aarch64/configure.ac: New configure template.
* sim/aarch64/aclocal.m4: Generate.
* sim/aarch64/config.in: Generate.
* sim/aarch64/configure: Generate.
* sim/aarch64/cpustate.c: New file - functions for accessing
AArch64 registers.
* sim/aarch64/cpustate.h: New header.
* sim/aarch64/decode.h: New header.
* sim/aarch64/interp.c: New file - interface between GDB and
simulator.
* sim/aarch64/Makefile.in: New makefile template.
* sim/aarch64/memory.c: New file - functions for simulating
aarch64 memory accesses.
* sim/aarch64/memory.h: New header.
* sim/aarch64/sim-main.h: New header.
* sim/aarch64/simulator.c: New file - aarch64 simulator
functions.
* sim/aarch64/simulator.h: New header.
include/gdb * sim-aarch64.h: New file.
sim/test * configure: Regenerate.
* sim/aarch64: New directory.
Diffstat (limited to 'sim/aarch64/simulator.c')
-rw-r--r-- | sim/aarch64/simulator.c | 13047 |
1 files changed, 13047 insertions, 0 deletions
diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c new file mode 100644 index 0000000..31c054c --- /dev/null +++ b/sim/aarch64/simulator.c @@ -0,0 +1,13047 @@ +/* simulator.c -- Interface for the AArch64 simulator. + + Copyright (C) 2015 Free Software Foundation, Inc. + + Contributed by Red Hat. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <syscall.h> +#include <math.h> +#include <time.h> +#include <limits.h> + +#include "dis-asm.h" + +#include "simulator.h" +#include "cpustate.h" +#include "memory.h" + +#define NO_SP 0 +#define SP_OK 1 + +bfd_boolean disas = FALSE; + +#define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag)) +#define IS_SET(_X) ( TST (( _X ))) +#define IS_CLEAR(_X) (!TST (( _X ))) + +#define HALT_UNALLOC \ + do \ + { \ + if (TRACE_INSN_P (cpu)) \ + { \ + aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \ + TRACE_INSN (cpu, \ + "Unallocated instruction detected at sim line %d,"\ + " exe addr %" PRIx64, \ + __LINE__, aarch64_get_PC (cpu)); \ + } \ + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ + sim_stopped, SIM_SIGILL); \ + } \ + while (0) + +#define HALT_NYI \ + do \ + { \ + if (TRACE_INSN_P (cpu)) \ + { \ + aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \ + TRACE_INSN (cpu, \ + "Unimplemented instruction detected at sim line %d,"\ + " exe addr %" PRIx64, \ + __LINE__, aarch64_get_PC (cpu)); \ + } \ + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ + sim_stopped, SIM_SIGABRT); \ + } \ + while (0) + +#define NYI_assert(HI, LO, EXPECTED) \ + do \ + { \ + if (uimm (aarch64_get_instr (cpu), (HI), (LO)) != (EXPECTED)) \ + HALT_NYI; \ + } \ + while (0) + +#define HALT_UNREACHABLE \ + do \ + { \ + TRACE_EVENTS (cpu, "ISE: unreachable code point"); \ + sim_engine_abort (NULL, cpu, aarch64_get_PC (cpu), "Internal Error"); \ + } \ + while (0) + +/* Helper functions used by expandLogicalImmediate. */ + +/* for i = 1, ... N result<i-1> = 1 other bits are zero */ +static inline uint64_t +ones (int N) +{ + return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1)); +} + +/* result<0> to val<N> */ +static inline uint64_t +pickbit (uint64_t val, int N) +{ + return pickbits64 (val, N, N); +} + +static uint64_t +expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N) +{ + uint64_t mask; + uint64_t imm; + unsigned simd_size; + + /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R + (in other words, right rotated by R), then replicated. */ + if (N != 0) + { + simd_size = 64; + mask = 0xffffffffffffffffull; + } + else + { + switch (S) + { + case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; + case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; + case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; + case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; + case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break; + default: return 0; + } + mask = (1ull << simd_size) - 1; + /* Top bits are IGNORED. */ + R &= simd_size - 1; + } + + /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */ + if (S == simd_size - 1) + return 0; + + /* S+1 consecutive bits to 1. */ + /* NOTE: S can't be 63 due to detection above. */ + imm = (1ull << (S + 1)) - 1; + + /* Rotate to the left by simd_size - R. */ + if (R != 0) + imm = ((imm << (simd_size - R)) & mask) | (imm >> R); + + /* Replicate the value according to SIMD size. */ + switch (simd_size) + { + case 2: imm = (imm << 2) | imm; + case 4: imm = (imm << 4) | imm; + case 8: imm = (imm << 8) | imm; + case 16: imm = (imm << 16) | imm; + case 32: imm = (imm << 32) | imm; + case 64: break; + default: return 0; + } + + return imm; +} + +/* Instr[22,10] encodes N immr and imms. we want a lookup table + for each possible combination i.e. 13 bits worth of int entries. */ +#define LI_TABLE_SIZE (1 << 13) +static uint64_t LITable[LI_TABLE_SIZE]; + +void +aarch64_init_LIT_table (void) +{ + unsigned index; + + for (index = 0; index < LI_TABLE_SIZE; index++) + { + uint32_t N = uimm (index, 12, 12); + uint32_t immr = uimm (index, 11, 6); + uint32_t imms = uimm (index, 5, 0); + + LITable [index] = expand_logical_immediate (imms, immr, N); + } +} + +static void +dexNotify (sim_cpu *cpu) +{ + /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry + 2 ==> exit Java, 3 ==> start next bytecode. */ + uint32_t type = uimm (aarch64_get_instr (cpu), 14, 0); + + TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type); + + switch (type) + { + case 0: + /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0), + aarch64_get_reg_u64 (cpu, R22, 0)); */ + break; + case 1: + /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0), + aarch64_get_reg_u64 (cpu, R22, 0)); */ + break; + case 2: + /* aarch64_notifyMethodExit (); */ + break; + case 3: + /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0), + aarch64_get_reg_u64 (cpu, R22, 0)); */ + break; + } +} + +/* secondary decode within top level groups */ + +static void +dexPseudo (sim_cpu *cpu) +{ + /* assert instr[28,27] = 00 + + We provide 2 pseudo instructions: + + HALT stops execution of the simulator causing an immediate + return to the x86 code which entered it. + + CALLOUT initiates recursive entry into x86 code. A register + argument holds the address of the x86 routine. Immediate + values in the instruction identify the number of general + purpose and floating point register arguments to be passed + and the type of any value to be returned. */ + + uint32_t PSEUDO_HALT = 0xE0000000U; + uint32_t PSEUDO_CALLOUT = 0x00018000U; + uint32_t PSEUDO_CALLOUTR = 0x00018001U; + uint32_t PSEUDO_NOTIFY = 0x00014000U; + uint32_t dispatch; + + if (aarch64_get_instr (cpu) == PSEUDO_HALT) + { + TRACE_EVENTS (cpu, " Pseudo Halt Instruction"); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGTRAP); + } + + dispatch = uimm (aarch64_get_instr (cpu), 31, 15); + + /* We do not handle callouts at the moment. */ + if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR) + { + TRACE_EVENTS (cpu, " Callout"); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGABRT); + } + + else if (dispatch == PSEUDO_NOTIFY) + dexNotify (cpu); + + else + HALT_UNALLOC; +} + +/* Load-store single register (unscaled offset) + These instructions employ a base register plus an unscaled signed + 9 bit offset. + + N.B. the base register (source) can be Xn or SP. all other + registers may not be SP. */ + +/* 32 bit load 32 bit unscaled signed 9 bit. */ +static void +ldur32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 64 bit load 64 bit unscaled signed 9 bit. */ +static void +ldur64 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 32 bit load zero-extended byte unscaled signed 9 bit. */ +static void +ldurb32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 32 bit load sign-extended byte unscaled signed 9 bit. */ +static void +ldursb32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 64 bit load sign-extended byte unscaled signed 9 bit. */ +static void +ldursb64 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 32 bit load zero-extended short unscaled signed 9 bit */ +static void +ldurh32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 32 bit load sign-extended short unscaled signed 9 bit */ +static void +ldursh32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 64 bit load sign-extended short unscaled signed 9 bit */ +static void +ldursh64 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* 64 bit load sign-extended word unscaled signed 9 bit */ +static void +ldursw (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + offset)); +} + +/* N.B. with stores the value in source is written to the address + identified by source2 modified by offset. */ + +/* 32 bit store 32 bit unscaled signed 9 bit. */ +static void +stur32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_u32 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_reg_u32 (cpu, rd, NO_SP)); +} + +/* 64 bit store 64 bit unscaled signed 9 bit */ +static void +stur64 (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_u64 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_reg_u64 (cpu, rd, NO_SP)); +} + +/* 32 bit store byte unscaled signed 9 bit */ +static void +sturb (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_u8 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_reg_u8 (cpu, rd, NO_SP)); +} + +/* 32 bit store short unscaled signed 9 bit */ +static void +sturh (sim_cpu *cpu, int32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_u16 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_reg_u16 (cpu, rd, NO_SP)); +} + +/* Load single register pc-relative label + Offset is a signed 19 bit immediate count in words + rt may not be SP. */ + +/* 32 bit pc-relative load */ +static void +ldr32_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_mem_u32 + (cpu, aarch64_get_PC (cpu) + offset * 4)); +} + +/* 64 bit pc-relative load */ +static void +ldr_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_mem_u64 + (cpu, aarch64_get_PC (cpu) + offset * 4)); +} + +/* sign extended 32 bit pc-relative load */ +static void +ldrsw_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_mem_s32 + (cpu, aarch64_get_PC (cpu) + offset * 4)); +} + +/* float pc-relative load */ +static void +fldrs_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned int rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, rd, + aarch64_get_mem_float + (cpu, aarch64_get_PC (cpu) + offset * 4)); +} + +/* double pc-relative load */ +static void +fldrd_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, st, + aarch64_get_mem_double + (cpu, aarch64_get_PC (cpu) + offset * 4)); +} + +/* long double pc-relative load. */ +static void +fldrq_pcrel (sim_cpu *cpu, int32_t offset) +{ + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t addr = aarch64_get_PC (cpu) + offset * 4; + FRegister a; + + aarch64_get_mem_long_double (cpu, addr, & a); + aarch64_set_FP_long_double (cpu, st, a); +} + +/* This can be used to scale an offset by applying + the requisite shift. the second argument is either + 16, 32 or 64. */ + +#define SCALE(_offset, _elementSize) \ + ((_offset) << ScaleShift ## _elementSize) + +/* This can be used to optionally scale a register derived offset + by applying the requisite shift as indicated by the Scaling + argument. the second argument is either Byte, Short, Word + or Long. The third argument is either Scaled or Unscaled. + N.B. when _Scaling is Scaled the shift gets ANDed with + all 1s while when it is Unscaled it gets ANDed with 0. */ + +#define OPT_SCALE(_offset, _elementType, _Scaling) \ + ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0)) + +/* This can be used to zero or sign extend a 32 bit register derived + value to a 64 bit value. the first argument must be the value as + a uint32_t and the second must be either UXTW or SXTW. The result + is returned as an int64_t. */ + +static inline int64_t +extend (uint32_t value, Extension extension) +{ + union + { + uint32_t u; + int32_t n; + } x; + + /* A branchless variant of this ought to be possible. */ + if (extension == UXTW || extension == NoExtension) + return value; + + x.u = value; + return x.n; +} + +/* Scalar Floating Point + + FP load/store single register (4 addressing modes) + + N.B. the base register (source) can be the stack pointer. + The secondary source register (source2) can only be an Xn register. */ + +/* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ +static void +fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_FP_float (cpu, st, aarch64_get_mem_float (cpu, address)); + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* Load 32 bit scaled unsigned 12 bit. */ +static void +fldrs_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + aarch64_set_FP_float (cpu, st, + aarch64_get_mem_float + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 32))); +} + +/* Load 32 bit scaled or unscaled zero- or sign-extended + 32-bit register offset. */ +static void +fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_FP_float (cpu, st, + aarch64_get_mem_float + (cpu, address + displacement)); +} + +/* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ +static void +fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* Load 64 bit scaled unsigned 12 bit. */ +static void +fldrd_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); + + aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address)); +} + +/* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */ +static void +fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 64, scaling); + + fldrd_wb (cpu, displacement, NoWriteBack); +} + +/* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */ +static void +fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + FRegister a; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_get_mem_long_double (cpu, address, & a); + aarch64_set_FP_long_double (cpu, st, a); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* Load 128 bit scaled unsigned 12 bit. */ +static void +fldrq_abs (sim_cpu *cpu, uint32_t offset) +{ + FRegister a; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); + + aarch64_get_mem_long_double (cpu, address, & a); + aarch64_set_FP_long_double (cpu, st, a); +} + +/* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */ +static void +fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 128, scaling); + + fldrq_wb (cpu, displacement, NoWriteBack); +} + +/* Memory Access + + load-store single register + There are four addressing modes available here which all employ a + 64 bit source (base) register. + + N.B. the base register (source) can be the stack pointer. + The secondary source register (source2)can only be an Xn register. + + Scaled, 12-bit, unsigned immediate offset, without pre- and + post-index options. + Unscaled, 9-bit, signed immediate offset with pre- or post-index + writeback. + scaled or unscaled 64-bit register offset. + scaled or unscaled 32-bit extended register offset. + + All offsets are assumed to be raw from the decode i.e. the + simulator is expected to adjust scaled offsets based on the + accessed data size with register or extended register offset + versions the same applies except that in the latter case the + operation may also require a sign extend. + + A separate method is provided for each possible addressing mode. */ + +/* 32 bit load 32 bit scaled unsigned 12 bit */ +static void +ldr32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 32))); +} + +/* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ +static void +ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit load 32 bit scaled or unscaled + zero- or sign-extended 32-bit register offset */ +static void +ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_u32 (cpu, address + displacement)); +} + +/* 64 bit load 64 bit scaled unsigned 12 bit */ +static void +ldr_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 64))); +} + +/* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ +static void +ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit load 64 bit scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 64, scaling); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_u64 (cpu, address + displacement)); +} + +/* 32 bit load zero-extended byte scaled unsigned 12 bit. */ +static void +ldrb32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be + there is no scaling required for a byte load. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_u8 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); +} + +/* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */ +static void +ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit load zero-extended byte scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + + /* There is no scaling required for a byte load. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_u8 (cpu, address + displacement)); +} + +/* 64 bit load sign-extended byte unscaled signed 9 bit + with pre- or post-writeback. */ +static void +ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit load sign-extended byte scaled unsigned 12 bit. */ +static void +ldrsb_abs (sim_cpu *cpu, uint32_t offset) +{ + ldrsb_wb (cpu, offset, NoWriteBack); +} + +/* 64 bit load sign-extended byte scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + /* There is no scaling required for a byte load. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_s8 (cpu, address + displacement)); +} + +/* 32 bit load zero-extended short scaled unsigned 12 bit. */ +static void +ldrh32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16))); +} + +/* 32 bit load zero-extended short unscaled signed 9 bit + with pre- or post-writeback. */ +static void +ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit load zero-extended short scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 16, scaling); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_u16 (cpu, address + displacement)); +} + +/* 32 bit load sign-extended short scaled unsigned 12 bit. */ +static void +ldrsh32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s16 + (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16))); +} + +/* 32 bit load sign-extended short unscaled signed 9 bit + with pre- or post-writeback. */ +static void +ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + (uint32_t) aarch64_get_mem_s16 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit load sign-extended short scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 16, scaling); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + (uint32_t) aarch64_get_mem_s16 + (cpu, address + displacement)); +} + +/* 64 bit load sign-extended short scaled unsigned 12 bit. */ +static void +ldrsh_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16))); +} + +/* 64 bit load sign-extended short unscaled signed 9 bit + with pre- or post-writeback. */ +static void +ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit load sign-extended short scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 16, scaling); + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_get_mem_s16 (cpu, address + displacement)); +} + +/* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */ +static void +ldrsw_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + return aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 32))); +} + +/* 64 bit load sign-extended 32 bit unscaled signed 9 bit + with pre- or post-writeback. */ +static void +ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit load sign-extended 32 bit scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_reg_s64 (cpu, rt, NO_SP, + aarch64_get_mem_s32 (cpu, address + displacement)); +} + +/* N.B. with stores the value in source is written to the + address identified by source2 modified by source3/offset. */ + +/* 32 bit store scaled unsigned 12 bit. */ +static void +str32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 32)), + aarch64_get_reg_u32 (cpu, rt, NO_SP)); +} + +/* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ +static void +str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + if (wb != Post) + address += offset; + + aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit store scaled or unscaled zero- or + sign-extended 32-bit register offset. */ +static void +str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_mem_u32 (cpu, address + displacement, + aarch64_get_reg_u64 (cpu, rt, NO_SP)); +} + +/* 64 bit store scaled unsigned 12 bit. */ +static void +str_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_u64 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 64), + aarch64_get_reg_u64 (cpu, rt, NO_SP)); +} + +/* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ +static void +str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit store scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 64, scaling); + + aarch64_set_mem_u64 (cpu, address + displacement, + aarch64_get_reg_u64 (cpu, rt, NO_SP)); +} + +/* 32 bit store byte scaled unsigned 12 bit. */ +static void +strb_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. + There is no scaling required for a byte load. */ + aarch64_set_mem_u8 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_reg_u8 (cpu, rt, NO_SP)); +} + +/* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */ +static void +strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit store byte scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + + /* There is no scaling required for a byte load. */ + aarch64_set_mem_u8 (cpu, address + displacement, + aarch64_get_reg_u8 (cpu, rt, NO_SP)); +} + +/* 32 bit store short scaled unsigned 12 bit. */ +static void +strh_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be. */ + aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16), + aarch64_get_reg_u16 (cpu, rt, NO_SP)); +} + +/* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */ +static void +strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address; + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit store short scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + /* rn may reference SP, rm and rt must reference ZR */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + uint64_t displacement = OPT_SCALE (extended, 16, scaling); + + aarch64_set_mem_u16 (cpu, address + displacement, + aarch64_get_reg_u16 (cpu, rt, NO_SP)); +} + +/* Prefetch unsigned 12 bit. */ +static void +prfm_abs (sim_cpu *cpu, uint32_t offset) +{ + /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, + 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, + 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, + 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, + 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, + 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, + ow ==> UNALLOC + PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 64). */ + + /* TODO : implement prefetch of address. */ +} + +/* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */ +static void +prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, + 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, + 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, + 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, + 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, + 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, + ow ==> UNALLOC + rn may reference SP, rm may only reference ZR + PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 64, scaling); + uint64_t address = base + displacement. */ + + /* TODO : implement prefetch of address */ +} + +/* 64 bit pc-relative prefetch. */ +static void +prfm_pcrel (sim_cpu *cpu, int32_t offset) +{ + /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, + 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, + 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, + 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, + 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, + 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, + ow ==> UNALLOC + PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_PC (cpu) + offset. */ + + /* TODO : implement this */ +} + +/* Load-store exclusive. */ + +static void +ldxr (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int size = uimm (aarch64_get_instr (cpu), 31, 30); + /* int ordered = uimm (aarch64_get_instr (cpu), 15, 15); */ + /* int exclusive = ! uimm (aarch64_get_instr (cpu), 23, 23); */ + + switch (size) + { + case 0: + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); + break; + case 1: + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); + break; + case 2: + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); + break; + case 3: + aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); + break; + default: + HALT_UNALLOC; + } +} + +static void +stxr (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rs = uimm (aarch64_get_instr (cpu), 20, 16); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int size = uimm (aarch64_get_instr (cpu), 31, 30); + uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP); + + switch (size) + { + case 0: aarch64_set_mem_u8 (cpu, address, data); break; + case 1: aarch64_set_mem_u16 (cpu, address, data); break; + case 2: aarch64_set_mem_u32 (cpu, address, data); break; + case 3: aarch64_set_mem_u64 (cpu, address, data); break; + default: HALT_UNALLOC; + } + + aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */ +} + +static void +dexLoadLiteral (sim_cpu *cpu) +{ + /* instr[29,27] == 011 + instr[25,24] == 00 + instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS + 010 ==> LDRX, 011 ==> FLDRD + 100 ==> LDRSW, 101 ==> FLDRQ + 110 ==> PRFM, 111 ==> UNALLOC + instr[26] ==> V : 0 ==> GReg, 1 ==> FReg + instr[23, 5] == simm19 */ + + /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ + uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 1) + | uimm (aarch64_get_instr (cpu), 26, 26)); + int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5); + + switch (dispatch) + { + case 0: ldr32_pcrel (cpu, imm); break; + case 1: fldrs_pcrel (cpu, imm); break; + case 2: ldr_pcrel (cpu, imm); break; + case 3: fldrd_pcrel (cpu, imm); break; + case 4: ldrsw_pcrel (cpu, imm); break; + case 5: fldrq_pcrel (cpu, imm); break; + case 6: prfm_pcrel (cpu, imm); break; + case 7: + default: + HALT_UNALLOC; + } +} + +/* Immediate arithmetic + The aimm argument is a 12 bit unsigned value or a 12 bit unsigned + value left shifted by 12 bits (done at decode). + + N.B. the register args (dest, source) can normally be Xn or SP. + the exception occurs for flag setting instructions which may + only use Xn for the output (dest). */ + +/* 32 bit add immediate. */ +static void +add32 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm); +} + +/* 64 bit add immediate. */ +static void +add64 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm); +} + +static void +set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) +{ + int32_t result = value1 + value2; + int64_t sresult = (int64_t) value1 + (int64_t) value2; + uint64_t uresult = (uint64_t)(uint32_t) value1 + + (uint64_t)(uint32_t) value2; + uint32_t flags = 0; + + if (result == 0) + flags |= Z; + + if (result & (1 << 31)) + flags |= N; + + if (uresult != result) + flags |= C; + + if (sresult != result) + flags |= V; + + aarch64_set_CPSR (cpu, flags); +} + +static void +set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) +{ + int64_t sval1 = value1; + int64_t sval2 = value2; + uint64_t result = value1 + value2; + int64_t sresult = sval1 + sval2; + uint32_t flags = 0; + + if (result == 0) + flags |= Z; + + if (result & (1ULL << 63)) + flags |= N; + + if (sval1 < 0) + { + if (sval2 < 0) + { + /* Negative plus a negative. Overflow happens if + the result is greater than either of the operands. */ + if (sresult > sval1 || sresult > sval2) + flags |= V; + } + /* else Negative plus a positive. Overflow cannot happen. */ + } + else /* value1 is +ve. */ + { + if (sval2 < 0) + { + /* Overflow can only occur if we computed "0 - MININT". */ + if (sval1 == 0 && sval2 == (1LL << 63)) + flags |= V; + } + else + { + /* Postive plus positive - overflow has happened if the + result is smaller than either of the operands. */ + if (result < value1 || result < value2) + flags |= V | C; + } + } + + aarch64_set_CPSR (cpu, flags); +} + +#define NEG(a) (((a) & signbit) == signbit) +#define POS(a) (((a) & signbit) == 0) + +static void +set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2) +{ + uint32_t result = value1 - value2; + uint32_t flags = 0; + uint32_t signbit = 1ULL << 31; + + if (result == 0) + flags |= Z; + + if (NEG (result)) + flags |= N; + + if ( (NEG (value1) && POS (value2)) + || (NEG (value1) && POS (result)) + || (POS (value2) && POS (result))) + flags |= C; + + if ( (NEG (value1) && POS (value2) && POS (result)) + || (POS (value1) && NEG (value2) && NEG (result))) + flags |= V; + + aarch64_set_CPSR (cpu, flags); +} + +static void +set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) +{ + uint64_t result = value1 - value2; + uint32_t flags = 0; + uint64_t signbit = 1ULL << 63; + + if (result == 0) + flags |= Z; + + if (NEG (result)) + flags |= N; + + if ( (NEG (value1) && POS (value2)) + || (NEG (value1) && POS (result)) + || (POS (value2) && POS (result))) + flags |= C; + + if ( (NEG (value1) && POS (value2) && POS (result)) + || (POS (value1) && NEG (value2) && NEG (result))) + flags |= V; + + aarch64_set_CPSR (cpu, flags); +} + +static void +set_flags_for_binop32 (sim_cpu *cpu, uint32_t result) +{ + uint32_t flags = 0; + + if (result == 0) + flags |= Z; + else + flags &= ~ Z; + + if (result & (1 << 31)) + flags |= N; + else + flags &= ~ N; + + aarch64_set_CPSR (cpu, flags); +} + +static void +set_flags_for_binop64 (sim_cpu *cpu, uint64_t result) +{ + uint32_t flags = 0; + + if (result == 0) + flags |= Z; + else + flags &= ~ Z; + + if (result & (1ULL << 63)) + flags |= N; + else + flags &= ~ N; + + aarch64_set_CPSR (cpu, flags); +} + +/* 32 bit add immediate set flags. */ +static void +adds32 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + /* TODO : do we need to worry about signs here? */ + int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm); + set_flags_for_add32 (cpu, value1, aimm); +} + +/* 64 bit add immediate set flags. */ +static void +adds64 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); + uint64_t value2 = aimm; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); + set_flags_for_add64 (cpu, value1, value2); +} + +/* 32 bit sub immediate. */ +static void +sub32 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm); +} + +/* 64 bit sub immediate. */ +static void +sub64 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm); +} + +/* 32 bit sub immediate set flags. */ +static void +subs32 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); + uint32_t value2 = aimm; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub32 (cpu, value1, value2); +} + +/* 64 bit sub immediate set flags. */ +static void +subs64 (sim_cpu *cpu, uint32_t aimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); + uint32_t value2 = aimm; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub64 (cpu, value1, value2); +} + +/* Data Processing Register. */ + +/* First two helpers to perform the shift operations. */ + +static inline uint32_t +shifted32 (uint32_t value, Shift shift, uint32_t count) +{ + switch (shift) + { + default: + case LSL: + return (value << count); + case LSR: + return (value >> count); + case ASR: + { + int32_t svalue = value; + return (svalue >> count); + } + case ROR: + { + uint32_t top = value >> count; + uint32_t bottom = value << (32 - count); + return (bottom | top); + } + } +} + +static inline uint64_t +shifted64 (uint64_t value, Shift shift, uint32_t count) +{ + switch (shift) + { + default: + case LSL: + return (value << count); + case LSR: + return (value >> count); + case ASR: + { + int64_t svalue = value; + return (svalue >> count); + } + case ROR: + { + uint64_t top = value >> count; + uint64_t bottom = value << (64 - count); + return (bottom | top); + } + } +} + +/* Arithmetic shifted register. + These allow an optional LSL, ASR or LSR to the second source + register with a count up to the register bit count. + + N.B register args may not be SP. */ + +/* 32 bit ADD shifted register. */ +static void +add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, rn, NO_SP) + + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count)); +} + +/* 64 bit ADD shifted register. */ +static void +add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, rn, NO_SP) + + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count)); +} + +/* 32 bit ADD shifted register setting flags. */ +static void +adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); + set_flags_for_add32 (cpu, value1, value2); +} + +/* 64 bit ADD shifted register setting flags. */ +static void +adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); + set_flags_for_add64 (cpu, value1, value2); +} + +/* 32 bit SUB shifted register. */ +static void +sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, rn, NO_SP) + - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count)); +} + +/* 64 bit SUB shifted register. */ +static void +sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, rn, NO_SP) + - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count)); +} + +/* 32 bit SUB shifted register setting flags. */ +static void +subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub32 (cpu, value1, value2); +} + +/* 64 bit SUB shifted register setting flags. */ +static void +subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub64 (cpu, value1, value2); +} + +/* First a couple more helpers to fetch the + relevant source register element either + sign or zero extended as required by the + extension value. */ + +static uint32_t +extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension) +{ + switch (extension) + { + case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); + case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); + case UXTW: /* Fall through. */ + case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP); + case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); + case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); + case SXTW: /* Fall through. */ + case SXTX: /* Fall through. */ + default: return aarch64_get_reg_s32 (cpu, lo, NO_SP); + } +} + +static uint64_t +extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension) +{ + switch (extension) + { + case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); + case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); + case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP); + case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP); + case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); + case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); + case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP); + case SXTX: + default: return aarch64_get_reg_s64 (cpu, lo, NO_SP); + } +} + +/* Arithmetic extending register + These allow an optional sign extension of some portion of the + second source register followed by an optional left shift of + between 1 and 4 bits (i.e. a shift of 0-4 bits???) + + N.B output (dest) and first input arg (source) may normally be Xn + or SP. However, for flag setting operations dest can only be + Xn. Second input registers are always Xn. */ + +/* 32 bit ADD extending register. */ +static void +add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, SP_OK) + + (extreg32 (cpu, rm, extension) << shift)); +} + +/* 64 bit ADD extending register. + N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ +static void +add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + + (extreg64 (cpu, rm, extension) << shift)); +} + +/* 32 bit ADD extending register setting flags. */ +static void +adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); + uint32_t value2 = extreg32 (cpu, rm, extension) << shift; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); + set_flags_for_add32 (cpu, value1, value2); +} + +/* 64 bit ADD extending register setting flags */ +/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ +static void +adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); + uint64_t value2 = extreg64 (cpu, rm, extension) << shift; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); + set_flags_for_add64 (cpu, value1, value2); +} + +/* 32 bit SUB extending register. */ +static void +sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, SP_OK) + - (extreg32 (cpu, rm, extension) << shift)); +} + +/* 64 bit SUB extending register. */ +/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ +static void +sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + - (extreg64 (cpu, rm, extension) << shift)); +} + +/* 32 bit SUB extending register setting flags. */ +static void +subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); + uint32_t value2 = extreg32 (cpu, rm, extension) << shift; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub32 (cpu, value1, value2); +} + +/* 64 bit SUB extending register setting flags */ +/* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ +static void +subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); + uint64_t value2 = extreg64 (cpu, rm, extension) << shift; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); + set_flags_for_sub64 (cpu, value1, value2); +} + +static void +dexAddSubtractImmediate (sim_cpu *cpu) +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30] = op : 0 ==> ADD, 1 ==> SUB + instr[29] = set : 0 ==> no flags, 1 ==> set flags + instr[28,24] = 10001 + instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC + instr[21,10] = uimm12 + instr[9,5] = Rn + instr[4,0] = Rd */ + + /* N.B. the shift is applied at decode before calling the add/sub routine. */ + uint32_t shift = uimm (aarch64_get_instr (cpu), 23, 22); + uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10); + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + + NYI_assert (28, 24, 0x11); + + if (shift > 1) + HALT_UNALLOC; + + if (shift) + imm <<= 12; + + switch (dispatch) + { + case 0: add32 (cpu, imm); break; + case 1: adds32 (cpu, imm); break; + case 2: sub32 (cpu, imm); break; + case 3: subs32 (cpu, imm); break; + case 4: add64 (cpu, imm); break; + case 5: adds64 (cpu, imm); break; + case 6: sub64 (cpu, imm); break; + case 7: subs64 (cpu, imm); break; + default: + HALT_UNALLOC; + } +} + +static void +dexAddSubtractShiftedRegister (sim_cpu *cpu) +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS + instr[28,24] = 01011 + instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC + instr[21] = 0 + instr[20,16] = Rm + instr[15,10] = count : must be 0xxxxx for 32 bit + instr[9,5] = Rn + instr[4,0] = Rd */ + + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + /* 32 bit operations must have count[5] = 0 + or else we have an UNALLOC. */ + uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10); + /* Shift encoded as ROR is unallocated. */ + Shift shiftType = shift (aarch64_get_instr (cpu), 22); + /* Dispatch on size:op i.e aarch64_get_instr (cpu)[31,29]. */ + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + + NYI_assert (28, 24, 0x0B); + NYI_assert (21, 21, 0); + + if (shiftType == ROR) + HALT_UNALLOC; + + if (!size && uimm (count, 5, 5)) + HALT_UNALLOC; + + switch (dispatch) + { + case 0: add32_shift (cpu, shiftType, count); break; + case 1: adds32_shift (cpu, shiftType, count); break; + case 2: sub32_shift (cpu, shiftType, count); break; + case 3: subs32_shift (cpu, shiftType, count); break; + case 4: add64_shift (cpu, shiftType, count); break; + case 5: adds64_shift (cpu, shiftType, count); break; + case 6: sub64_shift (cpu, shiftType, count); break; + case 7: subs64_shift (cpu, shiftType, count); break; + default: + HALT_UNALLOC; + } +} + +static void +dexAddSubtractExtendedRegister (sim_cpu *cpu) +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30] = op : 0 ==> ADD, 1 ==> SUB + instr[29] = set? : 0 ==> no flags, 1 ==> set flags + instr[28,24] = 01011 + instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC + instr[21] = 1 + instr[20,16] = Rm + instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH, + 000 ==> LSL|UXTW, 001 ==> UXTZ, + 000 ==> SXTB, 001 ==> SXTH, + 000 ==> SXTW, 001 ==> SXTX, + instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC + instr[9,5] = Rn + instr[4,0] = Rd */ + + Extension extensionType = extension (aarch64_get_instr (cpu), 13); + uint32_t shift = uimm (aarch64_get_instr (cpu), 12, 10); + /* dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */ + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + + NYI_assert (28, 24, 0x0B); + NYI_assert (21, 21, 1); + + /* Shift may not exceed 4. */ + if (shift > 4) + HALT_UNALLOC; + + switch (dispatch) + { + case 0: add32_ext (cpu, extensionType, shift); break; + case 1: adds32_ext (cpu, extensionType, shift); break; + case 2: sub32_ext (cpu, extensionType, shift); break; + case 3: subs32_ext (cpu, extensionType, shift); break; + case 4: add64_ext (cpu, extensionType, shift); break; + case 5: adds64_ext (cpu, extensionType, shift); break; + case 6: sub64_ext (cpu, extensionType, shift); break; + case 7: subs64_ext (cpu, extensionType, shift); break; + default: HALT_UNALLOC; + } +} + +/* Conditional data processing + Condition register is implicit 3rd source. */ + +/* 32 bit add with carry. */ +/* N.B register args may not be SP. */ + +static void +adc32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, rn, NO_SP) + + aarch64_get_reg_u32 (cpu, rm, NO_SP) + + IS_SET (C)); +} + +/* 64 bit add with carry */ +static void +adc64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, rn, NO_SP) + + aarch64_get_reg_u64 (cpu, rm, NO_SP) + + IS_SET (C)); +} + +/* 32 bit add with carry setting flags. */ +static void +adcs32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); + uint32_t carry = IS_SET (C); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); + set_flags_for_add32 (cpu, value1, value2 + carry); +} + +/* 64 bit add with carry setting flags. */ +static void +adcs64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); + uint64_t carry = IS_SET (C); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); + set_flags_for_add64 (cpu, value1, value2 + carry); +} + +/* 32 bit sub with carry. */ +static void +sbc32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, rn, NO_SP) + - aarch64_get_reg_u32 (cpu, rm, NO_SP) + - 1 + IS_SET (C)); +} + +/* 64 bit sub with carry */ +static void +sbc64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, rn, NO_SP) + - aarch64_get_reg_u64 (cpu, rm, NO_SP) + - 1 + IS_SET (C)); +} + +/* 32 bit sub with carry setting flags */ +static void +sbcs32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); + uint32_t carry = IS_SET (C); + uint32_t result = value1 - value2 + 1 - carry; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); + set_flags_for_sub32 (cpu, value1, value2 + 1 - carry); +} + +/* 64 bit sub with carry setting flags */ +static void +sbcs64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); + uint64_t carry = IS_SET (C); + uint64_t result = value1 - value2 + 1 - carry; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); + set_flags_for_sub64 (cpu, value1, value2 + 1 - carry); +} + +static void +dexAddSubtractWithCarry (sim_cpu *cpu) +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30] = op : 0 ==> ADC, 1 ==> SBC + instr[29] = set? : 0 ==> no flags, 1 ==> set flags + instr[28,21] = 1 1010 000 + instr[20,16] = Rm + instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC + instr[9,5] = Rn + instr[4,0] = Rd */ + + uint32_t op2 = uimm (aarch64_get_instr (cpu), 15, 10); + /* Dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */ + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + + NYI_assert (28, 21, 0xD0); + + if (op2 != 0) + HALT_UNALLOC; + + switch (dispatch) + { + case 0: adc32 (cpu); break; + case 1: adcs32 (cpu); break; + case 2: sbc32 (cpu); break; + case 3: sbcs32 (cpu); break; + case 4: adc64 (cpu); break; + case 5: adcs64 (cpu); break; + case 6: sbc64 (cpu); break; + case 7: sbcs64 (cpu); break; + default: HALT_UNALLOC; + } +} + +static uint32_t +testConditionCode (sim_cpu *cpu, CondCode cc) +{ + /* This should be reduceable to branchless logic + by some careful testing of bits in CC followed + by the requisite masking and combining of bits + from the flag register. + + For now we do it with a switch. */ + int res; + + switch (cc) + { + case EQ: res = IS_SET (Z); break; + case NE: res = IS_CLEAR (Z); break; + case CS: res = IS_SET (C); break; + case CC: res = IS_CLEAR (C); break; + case MI: res = IS_SET (N); break; + case PL: res = IS_CLEAR (N); break; + case VS: res = IS_SET (V); break; + case VC: res = IS_CLEAR (V); break; + case HI: res = IS_SET (C) && IS_CLEAR (Z); break; + case LS: res = IS_CLEAR (C) || IS_SET (Z); break; + case GE: res = IS_SET (N) == IS_SET (V); break; + case LT: res = IS_SET (N) != IS_SET (V); break; + case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break; + case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break; + case AL: + case NV: + default: + res = 1; + break; + } + return res; +} + +static void +CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30] = compare with positive (0) or negative value (1) + instr[29,21] = 1 1101 0010 + instr[20,16] = Rm or const + instr[15,12] = cond + instr[11] = compare reg (0) or const (1) + instr[10] = 0 + instr[9,5] = Rn + instr[4] = 0 + instr[3,0] = value for CPSR bits if the comparison does not take place. */ + signed int negate; + unsigned rm; + unsigned rn; + + NYI_assert (29, 21, 0x1d2); + NYI_assert (10, 10, 0); + NYI_assert (4, 4, 0); + + if (! testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12))) + { + aarch64_set_CPSR (cpu, uimm (aarch64_get_instr (cpu), 3, 0)); + return; + } + + negate = uimm (aarch64_get_instr (cpu), 30, 30) ? -1 : 1; + rm = uimm (aarch64_get_instr (cpu), 20, 16); + rn = uimm (aarch64_get_instr (cpu), 9, 5); + + if (uimm (aarch64_get_instr (cpu), 31, 31)) + { + if (uimm (aarch64_get_instr (cpu), 11, 11)) + set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), + negate * (uint64_t) rm); + else + set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), + negate * aarch64_get_reg_u64 (cpu, rm, SP_OK)); + } + else + { + if (uimm (aarch64_get_instr (cpu), 11, 11)) + set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), + negate * rm); + else + set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), + negate * aarch64_get_reg_u32 (cpu, rm, SP_OK)); + } +} + +static void +do_vec_MOV_whole_vector (sim_cpu *cpu) +{ + /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm) + + instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,21] = 001110101 + instr[20,16] = Vs + instr[15,10] = 000111 + instr[9,5] = Vs + instr[4,0] = Vd */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (29, 21, 0x075); + NYI_assert (15, 10, 0x07); + + if (uimm (aarch64_get_instr (cpu), 20, 16) != vs) + HALT_NYI; + + if (uimm (aarch64_get_instr (cpu), 30, 30)) + aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1)); + + aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0)); +} + +static void +do_vec_MOV_into_scalar (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = word(0)/long(1) + instr[29,21] = 00 1110 000 + instr[20,18] = element size and index + instr[17,10] = 00 0011 11 + instr[9,5] = V source + instr[4,0] = R dest */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (29, 21, 0x070); + NYI_assert (17, 10, 0x0F); + + switch (uimm (aarch64_get_instr (cpu), 20, 18)) + { + case 0x2: + aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0)); + break; + + case 0x6: + aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1)); + break; + + case 0x1: + case 0x3: + case 0x5: + case 0x7: + aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32 + (cpu, vs, uimm (aarch64_get_instr (cpu), 20, 19))); + break; + + default: + HALT_NYI; + } +} + +static void +do_vec_INS (sim_cpu *cpu) +{ + /* instr[31,21] = 01001110000 + instr[20,16] = element size and index + instr[15,10] = 000111 + instr[9,5] = W source + instr[4,0] = V dest */ + + int index; + unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 21, 0x270); + NYI_assert (15, 10, 0x07); + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + { + index = uimm (aarch64_get_instr (cpu), 20, 17); + aarch64_set_vec_u8 (cpu, vd, index, + aarch64_get_reg_u8 (cpu, rs, NO_SP)); + } + else if (uimm (aarch64_get_instr (cpu), 17, 17)) + { + index = uimm (aarch64_get_instr (cpu), 20, 18); + aarch64_set_vec_u16 (cpu, vd, index, + aarch64_get_reg_u16 (cpu, rs, NO_SP)); + } + else if (uimm (aarch64_get_instr (cpu), 18, 18)) + { + index = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_vec_u32 (cpu, vd, index, + aarch64_get_reg_u32 (cpu, rs, NO_SP)); + } + else if (uimm (aarch64_get_instr (cpu), 19, 19)) + { + index = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_vec_u64 (cpu, vd, index, + aarch64_get_reg_u64 (cpu, rs, NO_SP)); + } + else + HALT_NYI; +} + +static void +do_vec_DUP_vector_into_vector (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,21] = 00 1110 000 + instr[20,16] = element size and index + instr[15,10] = 0000 01 + instr[9,5] = V source + instr[4,0] = V dest. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int i, index; + + NYI_assert (29, 21, 0x070); + NYI_assert (15, 10, 0x01); + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + { + index = uimm (aarch64_get_instr (cpu), 20, 17); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index)); + } + else if (uimm (aarch64_get_instr (cpu), 17, 17)) + { + index = uimm (aarch64_get_instr (cpu), 20, 18); + + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index)); + } + else if (uimm (aarch64_get_instr (cpu), 18, 18)) + { + index = uimm (aarch64_get_instr (cpu), 20, 19); + + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index)); + } + else + { + if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + HALT_UNALLOC; + + if (! full) + HALT_UNALLOC; + + index = uimm (aarch64_get_instr (cpu), 20, 20); + + for (i = 0; i < 2; i++) + aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index)); + } +} + +static void +do_vec_TBL (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,21] = 00 1110 000 + instr[20,16] = Vm + instr[15] = 0 + instr[14,13] = vec length + instr[12,10] = 000 + instr[9,5] = V start + instr[4,0] = V dest */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int len = uimm (aarch64_get_instr (cpu), 14, 13) + 1; + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 21, 0x070); + NYI_assert (12, 10, 0); + + for (i = 0; i < (full ? 16 : 8); i++) + { + unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i); + uint8_t val; + + if (selector < 16) + val = aarch64_get_vec_u8 (cpu, vn, selector); + else if (selector < 32) + val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16); + else if (selector < 48) + val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32); + else if (selector < 64) + val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48); + else + val = 0; + + aarch64_set_vec_u8 (cpu, vd, i, val); + } +} + +static void +do_vec_TRN (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23,22] = size + instr[21] = 0 + instr[20,16] = Vm + instr[15] = 0 + instr[14] = TRN1 (0) / TRN2 (1) + instr[13,10] = 1010 + instr[9,5] = V source + instr[4,0] = V dest. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int second = uimm (aarch64_get_instr (cpu), 14, 14); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (13, 10, 0xA); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 8 : 4); i++) + { + aarch64_set_vec_u8 + (cpu, vd, i * 2, + aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2)); + aarch64_set_vec_u8 + (cpu, vd, 1 * 2 + 1, + aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1)); + } + break; + + case 1: + for (i = 0; i < (full ? 4 : 2); i++) + { + aarch64_set_vec_u16 + (cpu, vd, i * 2, + aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2)); + aarch64_set_vec_u16 + (cpu, vd, 1 * 2 + 1, + aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1)); + } + break; + + case 2: + aarch64_set_vec_u32 + (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0)); + aarch64_set_vec_u32 + (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1)); + aarch64_set_vec_u32 + (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2)); + aarch64_set_vec_u32 + (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3)); + break; + + case 3: + if (! full) + HALT_UNALLOC; + + aarch64_set_vec_u64 (cpu, vd, 0, + aarch64_get_vec_u64 (cpu, second ? vm : vn, 0)); + aarch64_set_vec_u64 (cpu, vd, 1, + aarch64_get_vec_u64 (cpu, second ? vn : vm, 1)); + break; + + default: + HALT_UNALLOC; + } +} + +static void +do_vec_DUP_scalar_into_vector (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits + [must be 1 for 64-bit xfer] + instr[29,20] = 00 1110 0000 + instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits, + 0100=> 32-bits. 1000=>64-bits + instr[15,10] = 0000 11 + instr[9,5] = W source + instr[4,0] = V dest. */ + + unsigned i; + unsigned Vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned Rs = uimm (aarch64_get_instr (cpu), 9, 5); + int both = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 20, 0x0E0); + NYI_assert (15, 10, 0x03); + + switch (uimm (aarch64_get_instr (cpu), 19, 16)) + { + case 1: + for (i = 0; i < (both ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP)); + break; + + case 2: + for (i = 0; i < (both ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP)); + break; + + case 4: + for (i = 0; i < (both ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP)); + break; + + case 8: + if (!both) + HALT_NYI; + aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); + aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); + break; + + default: + HALT_NYI; + } +} + +static void +do_vec_UZP (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23,22] = size: byte(00), half(01), word (10), long (11) + instr[21] = 0 + instr[20,16] = Vm + instr[15] = 0 + instr[14] = lower (0) / upper (1) + instr[13,10] = 0110 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int upper = uimm (aarch64_get_instr (cpu), 14, 14); + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); + uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); + uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); + uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); + + uint64_t val1 = 0; + uint64_t val2 = 0; + + uint64_t input1 = upper ? val_n1 : val_m1; + uint64_t input2 = upper ? val_n2 : val_m2; + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 0); + NYI_assert (15, 15, 0); + NYI_assert (13, 10, 6); + + switch (uimm (aarch64_get_instr (cpu), 23, 23)) + { + case 0: + for (i = 0; i < 8; i++) + { + val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8)); + val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8)); + } + break; + + case 1: + for (i = 0; i < 4; i++) + { + val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16)); + val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16)); + } + break; + + case 2: + val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL)); + val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL)); + + case 3: + val1 = input1; + val2 = input2; + break; + } + + aarch64_set_vec_u64 (cpu, vd, 0, val1); + if (full) + aarch64_set_vec_u64 (cpu, vd, 1, val2); +} + +static void +do_vec_ZIP (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23,22] = size: byte(00), hald(01), word (10), long (11) + instr[21] = 0 + instr[20,16] = Vm + instr[15] = 0 + instr[14] = lower (0) / upper (1) + instr[13,10] = 1110 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int upper = uimm (aarch64_get_instr (cpu), 14, 14); + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); + uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); + uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); + uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); + + uint64_t val1 = 0; + uint64_t val2 = 0; + + uint64_t input1 = upper ? val_n1 : val_m1; + uint64_t input2 = upper ? val_n2 : val_m2; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 0); + NYI_assert (15, 15, 0); + NYI_assert (13, 10, 0xE); + + switch (uimm (aarch64_get_instr (cpu), 23, 23)) + { + case 0: + val1 = + ((input1 << 0) & (0xFF << 0)) + | ((input2 << 8) & (0xFF << 8)) + | ((input1 << 8) & (0xFF << 16)) + | ((input2 << 16) & (0xFF << 24)) + | ((input1 << 16) & (0xFFULL << 32)) + | ((input2 << 24) & (0xFFULL << 40)) + | ((input1 << 24) & (0xFFULL << 48)) + | ((input2 << 32) & (0xFFULL << 56)); + + val2 = + ((input1 >> 32) & (0xFF << 0)) + | ((input2 >> 24) & (0xFF << 8)) + | ((input1 >> 24) & (0xFF << 16)) + | ((input2 >> 16) & (0xFF << 24)) + | ((input1 >> 16) & (0xFFULL << 32)) + | ((input2 >> 8) & (0xFFULL << 40)) + | ((input1 >> 8) & (0xFFULL << 48)) + | ((input2 >> 0) & (0xFFULL << 56)); + break; + + case 1: + val1 = + ((input1 << 0) & (0xFFFF << 0)) + | ((input2 << 16) & (0xFFFF << 16)) + | ((input1 << 16) & (0xFFFFULL << 32)) + | ((input2 << 32) & (0xFFFFULL << 48)); + + val2 = + ((input1 >> 32) & (0xFFFF << 0)) + | ((input2 >> 16) & (0xFFFF << 16)) + | ((input1 >> 16) & (0xFFFFULL << 32)) + | ((input2 >> 0) & (0xFFFFULL << 48)); + break; + + case 2: + val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32); + val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32); + break; + + case 3: + val1 = input1; + val2 = input2; + break; + } + + aarch64_set_vec_u64 (cpu, vd, 0, val1); + if (full) + aarch64_set_vec_u64 (cpu, vd, 1, val2); +} + +/* Floating point immediates are encoded in 8 bits. + fpimm[7] = sign bit. + fpimm[6:4] = signed exponent. + fpimm[3:0] = fraction (assuming leading 1). + i.e. F = s * 1.f * 2^(e - b). */ + +static float +fp_immediate_for_encoding_32 (uint32_t imm8) +{ + float u; + uint32_t s, e, f, i; + + s = (imm8 >> 7) & 0x1; + e = (imm8 >> 4) & 0x7; + f = imm8 & 0xf; + + /* The fp value is s * n/16 * 2r where n is 16+e. */ + u = (16.0 + f) / 16.0; + + /* N.B. exponent is signed. */ + if (e < 4) + { + int epos = e; + + for (i = 0; i <= epos; i++) + u *= 2.0; + } + else + { + int eneg = 7 - e; + + for (i = 0; i < eneg; i++) + u /= 2.0; + } + + if (s) + u = - u; + + return u; +} + +static double +fp_immediate_for_encoding_64 (uint32_t imm8) +{ + double u; + uint32_t s, e, f, i; + + s = (imm8 >> 7) & 0x1; + e = (imm8 >> 4) & 0x7; + f = imm8 & 0xf; + + /* The fp value is s * n/16 * 2r where n is 16+e. */ + u = (16.0 + f) / 16.0; + + /* N.B. exponent is signed. */ + if (e < 4) + { + int epos = e; + + for (i = 0; i <= epos; i++) + u *= 2.0; + } + else + { + int eneg = 7 - e; + + for (i = 0; i < eneg; i++) + u /= 2.0; + } + + if (s) + u = - u; + + return u; +} + +static void +do_vec_MOV_immediate (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,19] = 00111100000 + instr[18,16] = high 3 bits of uimm8 + instr[15,12] = size & shift: + 0000 => 32-bit + 0010 => 32-bit + LSL#8 + 0100 => 32-bit + LSL#16 + 0110 => 32-bit + LSL#24 + 1010 => 16-bit + LSL#8 + 1000 => 16-bit + 1101 => 32-bit + MSL#16 + 1100 => 32-bit + MSL#8 + 1110 => 8-bit + 1111 => double + instr[11,10] = 01 + instr[9,5] = low 5-bits of uimm8 + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5 + | uimm (aarch64_get_instr (cpu), 9, 5); + unsigned i; + + NYI_assert (29, 19, 0x1E0); + NYI_assert (11, 10, 1); + + switch (uimm (aarch64_get_instr (cpu), 15, 12)) + { + case 0x0: /* 32-bit, no shift. */ + case 0x2: /* 32-bit, shift by 8. */ + case 0x4: /* 32-bit, shift by 16. */ + case 0x6: /* 32-bit, shift by 24. */ + val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13)); + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, val); + break; + + case 0xa: /* 16-bit, shift by 8. */ + val <<= 8; + /* Fall through. */ + case 0x8: /* 16-bit, no shift. */ + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, val); + /* Fall through. */ + case 0xd: /* 32-bit, mask shift by 16. */ + val <<= 8; + val |= 0xFF; + /* Fall through. */ + case 0xc: /* 32-bit, mask shift by 8. */ + val <<= 8; + val |= 0xFF; + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, val); + break; + + case 0xe: /* 8-bit, no shift. */ + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, val); + break; + + case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */ + { + float u = fp_immediate_for_encoding_32 (val); + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, u); + break; + } + + default: + HALT_NYI; + } +} + +static void +do_vec_MVNI (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,19] = 10111100000 + instr[18,16] = high 3 bits of uimm8 + instr[15,12] = selector + instr[11,10] = 01 + instr[9,5] = low 5-bits of uimm8 + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5 + | uimm (aarch64_get_instr (cpu), 9, 5); + unsigned i; + + NYI_assert (29, 19, 0x5E0); + NYI_assert (11, 10, 1); + + switch (uimm (aarch64_get_instr (cpu), 15, 12)) + { + case 0x0: /* 32-bit, no shift. */ + case 0x2: /* 32-bit, shift by 8. */ + case 0x4: /* 32-bit, shift by 16. */ + case 0x6: /* 32-bit, shift by 24. */ + val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13)); + val = ~ val; + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, val); + return; + + case 0xa: /* 16-bit, 8 bit shift. */ + val <<= 8; + case 0x8: /* 16-bit, no shift. */ + val = ~ val; + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, val); + return; + + case 0xd: /* 32-bit, mask shift by 16. */ + val <<= 8; + val |= 0xFF; + case 0xc: /* 32-bit, mask shift by 8. */ + val <<= 8; + val |= 0xFF; + val = ~ val; + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, val); + return; + + case 0xE: /* MOVI Dn, #mask64 */ + { + uint64_t mask = 0; + + for (i = 0; i < 8; i++) + if (val & (1 << i)) + mask |= (0xF << (i * 4)); + aarch64_set_vec_u64 (cpu, vd, 0, mask); + aarch64_set_vec_u64 (cpu, vd, 1, 0); + return; + } + + case 0xf: /* FMOV Vd.2D, #fpimm. */ + { + double u = fp_immediate_for_encoding_64 (val); + + if (! full) + HALT_UNALLOC; + + aarch64_set_vec_double (cpu, vd, 0, u); + aarch64_set_vec_double (cpu, vd, 1, u); + return; + } + + default: + HALT_NYI; + } +} + +#define ABS(A) ((A) < 0 ? - (A) : (A)) + +static void +do_vec_ABS (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit + instr[21,10] = 10 0000 1011 10 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0x82E); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, + ABS (aarch64_get_vec_s8 (cpu, vn, i))); + break; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, + ABS (aarch64_get_vec_s16 (cpu, vn, i))); + break; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, + ABS (aarch64_get_vec_s32 (cpu, vn, i))); + break; + + case 3: + if (! full) + HALT_NYI; + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, + ABS (aarch64_get_vec_s64 (cpu, vn, i))); + break; + } +} + +static void +do_vec_ADDV (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,24] = 00 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit + instr[21,10] = 11 0001 1011 10 + instr[9,5] = Vm + instr[4.0] = Rd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + uint64_t val = 0; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0xC6E); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + val += aarch64_get_vec_u8 (cpu, vm, i); + aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + val += aarch64_get_vec_u16 (cpu, vm, i); + aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + val += aarch64_get_vec_u32 (cpu, vm, i); + aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + return; + + case 3: + if (! full) + HALT_UNALLOC; + val = aarch64_get_vec_u64 (cpu, vm, 0); + val += aarch64_get_vec_u64 (cpu, vm, 1); + aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + return; + + default: + HALT_UNREACHABLE; + } +} + +static void +do_vec_ins_2 (sim_cpu *cpu) +{ + /* instr[31,21] = 01001110000 + instr[20,18] = size & element selector + instr[17,14] = 0000 + instr[13] = direction: to vec(0), from vec (1) + instr[12,10] = 111 + instr[9,5] = Vm + instr[4,0] = Vd. */ + + unsigned elem; + unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 21, 0x270); + NYI_assert (17, 14, 0); + NYI_assert (12, 10, 7); + + if (uimm (aarch64_get_instr (cpu), 13, 13) == 1) + { + if (uimm (aarch64_get_instr (cpu), 18, 18) == 1) + { + /* 32-bit moves. */ + elem = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_reg_u64 (cpu, vd, NO_SP, + aarch64_get_vec_u32 (cpu, vm, elem)); + } + else + { + /* 64-bit moves. */ + if (uimm (aarch64_get_instr (cpu), 19, 19) != 1) + HALT_NYI; + + elem = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_reg_u64 (cpu, vd, NO_SP, + aarch64_get_vec_u64 (cpu, vm, elem)); + } + } + else + { + if (uimm (aarch64_get_instr (cpu), 18, 18) == 1) + { + /* 32-bit moves. */ + elem = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_vec_u32 (cpu, vd, elem, + aarch64_get_reg_u32 (cpu, vm, NO_SP)); + } + else + { + /* 64-bit moves. */ + if (uimm (aarch64_get_instr (cpu), 19, 19) != 1) + HALT_NYI; + + elem = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_vec_u64 (cpu, vd, elem, + aarch64_get_reg_u64 (cpu, vm, NO_SP)); + } + } +} + +static void +do_vec_mull (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = lower(0)/upper(1) selector + instr[29] = signed(0)/unsigned(1) + instr[28,24] = 0 1110 + instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10) + instr[21] = 1 + instr[20,16] = Vm + instr[15,10] = 11 0000 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + int unsign = uimm (aarch64_get_instr (cpu), 29, 29); + int bias = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (28, 24, 0x0E); + NYI_assert (15, 10, 0x30); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + if (bias) + bias = 8; + if (unsign) + for (i = 0; i < 8; i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i + bias) + * aarch64_get_vec_u8 (cpu, vm, i + bias)); + else + for (i = 0; i < 8; i++) + aarch64_set_vec_s16 (cpu, vd, i, + aarch64_get_vec_s8 (cpu, vn, i + bias) + * aarch64_get_vec_s8 (cpu, vm, i + bias)); + return; + + case 1: + if (bias) + bias = 4; + if (unsign) + for (i = 0; i < 4; i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vn, i + bias) + * aarch64_get_vec_u16 (cpu, vm, i + bias)); + else + for (i = 0; i < 4; i++) + aarch64_set_vec_s32 (cpu, vd, i, + aarch64_get_vec_s16 (cpu, vn, i + bias) + * aarch64_get_vec_s16 (cpu, vm, i + bias)); + return; + + case 2: + if (bias) + bias = 2; + if (unsign) + for (i = 0; i < 2; i++) + aarch64_set_vec_u64 (cpu, vd, i, + (uint64_t) aarch64_get_vec_u32 (cpu, vn, + i + bias) + * (uint64_t) aarch64_get_vec_u32 (cpu, vm, + i + bias)); + else + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, + aarch64_get_vec_s32 (cpu, vn, i + bias) + * aarch64_get_vec_s32 (cpu, vm, i + bias)); + return; + + case 3: + default: + HALT_NYI; + } +} + +static void +do_vec_fadd (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 001110 + instr[23] = FADD(0)/FSUB(1) + instr[22] = float (0)/double(1) + instr[21] = 1 + instr[20,16] = Vm + instr[15,10] = 110101 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x35); + + if (uimm (aarch64_get_instr (cpu), 23, 23)) + { + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_NYI; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + aarch64_get_vec_double (cpu, vn, i) + - aarch64_get_vec_double (cpu, vm, i)); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + aarch64_get_vec_float (cpu, vn, i) + - aarch64_get_vec_float (cpu, vm, i)); + } + } + else + { + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_NYI; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + aarch64_get_vec_double (cpu, vm, i) + + aarch64_get_vec_double (cpu, vn, i)); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + aarch64_get_vec_float (cpu, vm, i) + + aarch64_get_vec_float (cpu, vn, i)); + } + } +} + +static void +do_vec_add (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,24] = 001110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 100001 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x21); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) + + aarch64_get_vec_u8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) + + aarch64_get_vec_u16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) + + aarch64_get_vec_u32 (cpu, vm, i)); + return; + + case 3: + if (! full) + HALT_UNALLOC; + aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0) + + aarch64_get_vec_u64 (cpu, vm, 0)); + aarch64_set_vec_u64 (cpu, vd, 1, + aarch64_get_vec_u64 (cpu, vn, 1) + + aarch64_get_vec_u64 (cpu, vm, 1)); + return; + + default: + HALT_UNREACHABLE; + } +} + +static void +do_vec_mul (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,24] = 00 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 10 0111 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x27); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + { + uint16_t val = aarch64_get_vec_u8 (cpu, vn, i); + val *= aarch64_get_vec_u8 (cpu, vm, i); + + aarch64_set_vec_u16 (cpu, vd, i, val); + } + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + { + uint32_t val = aarch64_get_vec_u16 (cpu, vn, i); + val *= aarch64_get_vec_u16 (cpu, vm, i); + + aarch64_set_vec_u32 (cpu, vd, i, val); + } + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + { + uint64_t val = aarch64_get_vec_u32 (cpu, vn, i); + val *= aarch64_get_vec_u32 (cpu, vm, i); + + aarch64_set_vec_u64 (cpu, vd, i, val); + } + return; + + default: + case 3: + HALT_UNALLOC; + } +} + +static void +do_vec_MLA (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,24] = 00 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 1001 01 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x25); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + { + uint16_t val = aarch64_get_vec_u8 (cpu, vn, i); + val *= aarch64_get_vec_u8 (cpu, vm, i); + val += aarch64_get_vec_u8 (cpu, vd, i); + + aarch64_set_vec_u16 (cpu, vd, i, val); + } + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + { + uint32_t val = aarch64_get_vec_u16 (cpu, vn, i); + val *= aarch64_get_vec_u16 (cpu, vm, i); + val += aarch64_get_vec_u16 (cpu, vd, i); + + aarch64_set_vec_u32 (cpu, vd, i, val); + } + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + { + uint64_t val = aarch64_get_vec_u32 (cpu, vn, i); + val *= aarch64_get_vec_u32 (cpu, vm, i); + val += aarch64_get_vec_u32 (cpu, vd, i); + + aarch64_set_vec_u64 (cpu, vd, i, val); + } + return; + + default: + case 3: + HALT_UNALLOC; + } +} + +static float +fmaxnm (float a, float b) +{ + if (fpclassify (a) == FP_NORMAL) + { + if (fpclassify (b) == FP_NORMAL) + return a > b ? a : b; + return a; + } + else if (fpclassify (b) == FP_NORMAL) + return b; + return a; +} + +static float +fminnm (float a, float b) +{ + if (fpclassify (a) == FP_NORMAL) + { + if (fpclassify (b) == FP_NORMAL) + return a < b ? a : b; + return a; + } + else if (fpclassify (b) == FP_NORMAL) + return b; + return a; +} + +static double +dmaxnm (double a, double b) +{ + if (fpclassify (a) == FP_NORMAL) + { + if (fpclassify (b) == FP_NORMAL) + return a > b ? a : b; + return a; + } + else if (fpclassify (b) == FP_NORMAL) + return b; + return a; +} + +static double +dminnm (double a, double b) +{ + if (fpclassify (a) == FP_NORMAL) + { + if (fpclassify (b) == FP_NORMAL) + return a < b ? a : b; + return a; + } + else if (fpclassify (b) == FP_NORMAL) + return b; + return a; +} + +static void +do_vec_FminmaxNMP (sim_cpu *cpu) +{ + /* aarch64_get_instr (cpu)[31] = 0 + aarch64_get_instr (cpu)[30] = half (0)/full (1) + aarch64_get_instr (cpu)[29,24] = 10 1110 + aarch64_get_instr (cpu)[23] = max(0)/min(1) + aarch64_get_instr (cpu)[22] = float (0)/double (1) + aarch64_get_instr (cpu)[21] = 1 + aarch64_get_instr (cpu)[20,16] = Vn + aarch64_get_instr (cpu)[15,10] = 1100 01 + aarch64_get_instr (cpu)[9,5] = Vm + aarch64_get_instr (cpu)[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x31); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double (* fn)(double, double) = uimm (aarch64_get_instr (cpu), 23, 23) + ? dminnm : dmaxnm; + + if (! full) + HALT_NYI; + aarch64_set_vec_double (cpu, vd, 0, + fn (aarch64_get_vec_double (cpu, vn, 0), + aarch64_get_vec_double (cpu, vn, 1))); + aarch64_set_vec_double (cpu, vd, 0, + fn (aarch64_get_vec_double (cpu, vm, 0), + aarch64_get_vec_double (cpu, vm, 1))); + } + else + { + float (* fn)(float, float) = uimm (aarch64_get_instr (cpu), 23, 23) + ? fminnm : fmaxnm; + + aarch64_set_vec_float (cpu, vd, 0, + fn (aarch64_get_vec_float (cpu, vn, 0), + aarch64_get_vec_float (cpu, vn, 1))); + if (full) + aarch64_set_vec_float (cpu, vd, 1, + fn (aarch64_get_vec_float (cpu, vn, 2), + aarch64_get_vec_float (cpu, vn, 3))); + + aarch64_set_vec_float (cpu, vd, (full ? 2 : 1), + fn (aarch64_get_vec_float (cpu, vm, 0), + aarch64_get_vec_float (cpu, vm, 1))); + if (full) + aarch64_set_vec_float (cpu, vd, 3, + fn (aarch64_get_vec_float (cpu, vm, 2), + aarch64_get_vec_float (cpu, vm, 3))); + } +} + +static void +do_vec_AND (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 001110001 + instr[20,16] = Vm + instr[15,10] = 000111 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x071); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vn, i) + & aarch64_get_vec_u32 (cpu, vm, i)); +} + +static void +do_vec_BSL (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 101110011 + instr[20,16] = Vm + instr[15,10] = 000111 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x173); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + ( aarch64_get_vec_u8 (cpu, vd, i) + & aarch64_get_vec_u8 (cpu, vn, i)) + | ((~ aarch64_get_vec_u8 (cpu, vd, i)) + & aarch64_get_vec_u8 (cpu, vm, i))); +} + +static void +do_vec_EOR (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 10 1110 001 + instr[20,16] = Vm + instr[15,10] = 000111 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x171); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vn, i) + ^ aarch64_get_vec_u32 (cpu, vm, i)); +} + +static void +do_vec_bit (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,23] = 10 1110 1 + instr[22] = BIT (0) / BIF (1) + instr[21] = 1 + instr[20,16] = Vm + instr[15,10] = 0001 11 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned test_false = uimm (aarch64_get_instr (cpu), 22, 22); + unsigned i; + + NYI_assert (29, 23, 0x5D); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x07); + + if (test_false) + { + for (i = 0; i < (full ? 16 : 8); i++) + if (aarch64_get_vec_u32 (cpu, vn, i) == 0) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i)); + } + else + { + for (i = 0; i < (full ? 16 : 8); i++) + if (aarch64_get_vec_u32 (cpu, vn, i) != 0) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i)); + } +} + +static void +do_vec_ORN (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 00 1110 111 + instr[20,16] = Vm + instr[15,10] = 00 0111 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x077); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i) + | ~ aarch64_get_vec_u8 (cpu, vm, i)); +} + +static void +do_vec_ORR (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 00 1110 101 + instr[20,16] = Vm + instr[15,10] = 0001 11 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x075); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i) + | aarch64_get_vec_u8 (cpu, vm, i)); +} + +static void +do_vec_BIC (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 00 1110 011 + instr[20,16] = Vm + instr[15,10] = 00 0111 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 21, 0x073); + NYI_assert (15, 10, 0x07); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i) + & ~ aarch64_get_vec_u8 (cpu, vm, i)); +} + +static void +do_vec_XTN (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = first part (0)/ second part (1) + instr[29,24] = 00 1110 + instr[23,22] = size: byte(00), half(01), word (10) + instr[21,10] = 1000 0100 1010 + instr[9,5] = Vs + instr[4,0] = Vd. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned bias = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0x84A); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + if (bias) + for (i = 0; i < 8; i++) + aarch64_set_vec_u8 (cpu, vd, i + 8, + aarch64_get_vec_u16 (cpu, vs, i) >> 8); + else + for (i = 0; i < 8; i++) + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i)); + return; + + case 1: + if (bias) + for (i = 0; i < 4; i++) + aarch64_set_vec_u16 (cpu, vd, i + 4, + aarch64_get_vec_u32 (cpu, vs, i) >> 16); + else + for (i = 0; i < 4; i++) + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i)); + return; + + case 2: + if (bias) + for (i = 0; i < 2; i++) + aarch64_set_vec_u32 (cpu, vd, i + 4, + aarch64_get_vec_u64 (cpu, vs, i) >> 32); + else + for (i = 0; i < 2; i++) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i)); + return; + + default: + HALT_UNALLOC; + } +} + +#define MAX(A,B) ((A) > (B) ? (A) : (B)) +#define MIN(A,B) ((A) < (B) ? (A) : (B)) + +static void +do_vec_maxv (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29] = signed (0)/unsigned(1) + instr[28,24] = 0 1110 + instr[23,22] = size: byte(00), half(01), word (10) + instr[21] = 1 + instr[20,17] = 1 000 + instr[16] = max(0)/min(1) + instr[15,10] = 1010 10 + instr[9,5] = V source + instr[4.0] = R dest. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned i; + + NYI_assert (28, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (20, 17, 8); + NYI_assert (15, 10, 0x2A); + + switch ((uimm (aarch64_get_instr (cpu), 29, 29) << 1) + | uimm (aarch64_get_instr (cpu), 16, 16)) + { + case 0: /* SMAXV. */ + { + int64_t smax; + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + smax = aarch64_get_vec_s8 (cpu, vs, 0); + for (i = 1; i < (full ? 16 : 8); i++) + smax = MAX (smax, aarch64_get_vec_s8 (cpu, vs, i)); + break; + case 1: + smax = aarch64_get_vec_s16 (cpu, vs, 0); + for (i = 1; i < (full ? 8 : 4); i++) + smax = MAX (smax, aarch64_get_vec_s16 (cpu, vs, i)); + break; + case 2: + smax = aarch64_get_vec_s32 (cpu, vs, 0); + for (i = 1; i < (full ? 4 : 2); i++) + smax = MAX (smax, aarch64_get_vec_s32 (cpu, vs, i)); + break; + default: + case 3: + HALT_UNALLOC; + } + aarch64_set_reg_s64 (cpu, rd, NO_SP, smax); + return; + } + + case 1: /* SMINV. */ + { + int64_t smin; + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + smin = aarch64_get_vec_s8 (cpu, vs, 0); + for (i = 1; i < (full ? 16 : 8); i++) + smin = MIN (smin, aarch64_get_vec_s8 (cpu, vs, i)); + break; + case 1: + smin = aarch64_get_vec_s16 (cpu, vs, 0); + for (i = 1; i < (full ? 8 : 4); i++) + smin = MIN (smin, aarch64_get_vec_s16 (cpu, vs, i)); + break; + case 2: + smin = aarch64_get_vec_s32 (cpu, vs, 0); + for (i = 1; i < (full ? 4 : 2); i++) + smin = MIN (smin, aarch64_get_vec_s32 (cpu, vs, i)); + break; + default: + case 3: + HALT_UNALLOC; + } + aarch64_set_reg_s64 (cpu, rd, NO_SP, smin); + return; + } + + case 2: /* UMAXV. */ + { + uint64_t umax; + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + umax = aarch64_get_vec_u8 (cpu, vs, 0); + for (i = 1; i < (full ? 16 : 8); i++) + umax = MAX (umax, aarch64_get_vec_u8 (cpu, vs, i)); + break; + case 1: + umax = aarch64_get_vec_u16 (cpu, vs, 0); + for (i = 1; i < (full ? 8 : 4); i++) + umax = MAX (umax, aarch64_get_vec_u16 (cpu, vs, i)); + break; + case 2: + umax = aarch64_get_vec_u32 (cpu, vs, 0); + for (i = 1; i < (full ? 4 : 2); i++) + umax = MAX (umax, aarch64_get_vec_u32 (cpu, vs, i)); + break; + default: + case 3: + HALT_UNALLOC; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, umax); + return; + } + + case 3: /* UMINV. */ + { + uint64_t umin; + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + umin = aarch64_get_vec_u8 (cpu, vs, 0); + for (i = 1; i < (full ? 16 : 8); i++) + umin = MIN (umin, aarch64_get_vec_u8 (cpu, vs, i)); + break; + case 1: + umin = aarch64_get_vec_u16 (cpu, vs, 0); + for (i = 1; i < (full ? 8 : 4); i++) + umin = MIN (umin, aarch64_get_vec_u16 (cpu, vs, i)); + break; + case 2: + umin = aarch64_get_vec_u32 (cpu, vs, 0); + for (i = 1; i < (full ? 4 : 2); i++) + umin = MIN (umin, aarch64_get_vec_u32 (cpu, vs, i)); + break; + default: + case 3: + HALT_UNALLOC; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, umin); + return; + } + + default: + HALT_UNALLOC; + } +} + +static void +do_vec_fminmaxV (sim_cpu *cpu) +{ + /* instr[31,24] = 0110 1110 + instr[23] = max(0)/min(1) + instr[22,14] = 011 0000 11 + instr[13,12] = nm(00)/normal(11) + instr[11,10] = 10 + instr[9,5] = V source + instr[4.0] = R dest. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + float res = aarch64_get_vec_float (cpu, vs, 0); + + NYI_assert (31, 24, 0x6E); + NYI_assert (22, 14, 0x0C3); + NYI_assert (11, 10, 2); + + if (uimm (aarch64_get_instr (cpu), 23, 23)) + { + switch (uimm (aarch64_get_instr (cpu), 13, 12)) + { + case 0: /* FMNINNMV. */ + for (i = 1; i < 4; i++) + res = fminnm (res, aarch64_get_vec_float (cpu, vs, i)); + break; + + case 3: /* FMINV. */ + for (i = 1; i < 4; i++) + res = MIN (res, aarch64_get_vec_float (cpu, vs, i)); + break; + + default: + HALT_NYI; + } + } + else + { + switch (uimm (aarch64_get_instr (cpu), 13, 12)) + { + case 0: /* FMNAXNMV. */ + for (i = 1; i < 4; i++) + res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i)); + break; + + case 3: /* FMAXV. */ + for (i = 1; i < 4; i++) + res = MAX (res, aarch64_get_vec_float (cpu, vs, i)); + break; + + default: + HALT_NYI; + } + } + + aarch64_set_FP_float (cpu, rd, res); +} + +static void +do_vec_Fminmax (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23] = max(0)/min(1) + instr[22] = float(0)/double(1) + instr[21] = 1 + instr[20,16] = Vm + instr[15,14] = 11 + instr[13,12] = nm(00)/normal(11) + instr[11,10] = 01 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned min = uimm (aarch64_get_instr (cpu), 23, 23); + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 14, 3); + NYI_assert (11, 10, 1); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double (* func)(double, double); + + if (! full) + HALT_NYI; + + if (uimm (aarch64_get_instr (cpu), 13, 12) == 0) + func = min ? dminnm : dmaxnm; + else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3) + func = min ? fmin : fmax; + else + HALT_NYI; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + func (aarch64_get_vec_double (cpu, vn, i), + aarch64_get_vec_double (cpu, vm, i))); + } + else + { + float (* func)(float, float); + + if (uimm (aarch64_get_instr (cpu), 13, 12) == 0) + func = min ? fminnm : fmaxnm; + else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3) + func = min ? fminf : fmaxf; + else + HALT_NYI; + + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + func (aarch64_get_vec_float (cpu, vn, i), + aarch64_get_vec_float (cpu, vm, i))); + } +} + +static void +do_vec_SCVTF (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = Q + instr[29,23] = 00 1110 0 + instr[22] = float(0)/double(1) + instr[21,10] = 10 0001 1101 10 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 22, 22); + unsigned i; + + NYI_assert (29, 23, 0x1C); + NYI_assert (21, 10, 0x876); + + if (size) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + { + double val = (double) aarch64_get_vec_u64 (cpu, vn, i); + aarch64_set_vec_double (cpu, vd, i, val); + } + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + { + float val = (float) aarch64_get_vec_u32 (cpu, vn, i); + aarch64_set_vec_float (cpu, vd, i, val); + } + } +} + +#define VEC_CMP(SOURCE, CMP) \ + do \ + { \ + switch (size) \ + { \ + case 0: \ + for (i = 0; i < (full ? 16 : 8); i++) \ + aarch64_set_vec_u8 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ + CMP \ + aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \ + ? -1 : 0); \ + return; \ + case 1: \ + for (i = 0; i < (full ? 8 : 4); i++) \ + aarch64_set_vec_u16 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ + CMP \ + aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \ + ? -1 : 0); \ + return; \ + case 2: \ + for (i = 0; i < (full ? 4 : 2); i++) \ + aarch64_set_vec_u32 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ + CMP \ + aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \ + ? -1 : 0); \ + return; \ + case 3: \ + if (! full) \ + HALT_UNALLOC; \ + for (i = 0; i < 2; i++) \ + aarch64_set_vec_u64 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ + CMP \ + aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \ + ? -1ULL : 0); \ + return; \ + default: \ + HALT_UNALLOC; \ + } \ + } \ + while (0) + +#define VEC_CMP0(SOURCE, CMP) \ + do \ + { \ + switch (size) \ + { \ + case 0: \ + for (i = 0; i < (full ? 16 : 8); i++) \ + aarch64_set_vec_u8 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ + CMP 0 ? -1 : 0); \ + return; \ + case 1: \ + for (i = 0; i < (full ? 8 : 4); i++) \ + aarch64_set_vec_u16 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ + CMP 0 ? -1 : 0); \ + return; \ + case 2: \ + for (i = 0; i < (full ? 4 : 2); i++) \ + aarch64_set_vec_u32 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ + CMP 0 ? -1 : 0); \ + return; \ + case 3: \ + if (! full) \ + HALT_UNALLOC; \ + for (i = 0; i < 2; i++) \ + aarch64_set_vec_u64 (cpu, vd, i, \ + aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ + CMP 0 ? -1ULL : 0); \ + return; \ + default: \ + HALT_UNALLOC; \ + } \ + } \ + while (0) + +#define VEC_FCMP0(CMP) \ + do \ + { \ + if (vm != 0) \ + HALT_NYI; \ + if (uimm (aarch64_get_instr (cpu), 22, 22)) \ + { \ + if (! full) \ + HALT_NYI; \ + for (i = 0; i < 2; i++) \ + aarch64_set_vec_u64 (cpu, vd, i, \ + aarch64_get_vec_double (cpu, vn, i) \ + CMP 0.0 ? -1 : 0); \ + } \ + else \ + { \ + for (i = 0; i < (full ? 4 : 2); i++) \ + aarch64_set_vec_u32 (cpu, vd, i, \ + aarch64_get_vec_float (cpu, vn, i) \ + CMP 0.0 ? -1 : 0); \ + } \ + return; \ + } \ + while (0) + +#define VEC_FCMP(CMP) \ + do \ + { \ + if (uimm (aarch64_get_instr (cpu), 22, 22)) \ + { \ + if (! full) \ + HALT_NYI; \ + for (i = 0; i < 2; i++) \ + aarch64_set_vec_u64 (cpu, vd, i, \ + aarch64_get_vec_double (cpu, vn, i) \ + CMP \ + aarch64_get_vec_double (cpu, vm, i) \ + ? -1 : 0); \ + } \ + else \ + { \ + for (i = 0; i < (full ? 4 : 2); i++) \ + aarch64_set_vec_u32 (cpu, vd, i, \ + aarch64_get_vec_float (cpu, vn, i) \ + CMP \ + aarch64_get_vec_float (cpu, vm, i) \ + ? -1 : 0); \ + } \ + return; \ + } \ + while (0) + +static void +do_vec_compare (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29] = part-of-comparison-type + instr[28,24] = 0 1110 + instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11) + type of float compares: single (-0) / double (-1) + instr[21] = 1 + instr[20,16] = Vm or 00000 (compare vs 0) + instr[15,10] = part-of-comparison-type + instr[9,5] = Vn + instr[4.0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int size = uimm (aarch64_get_instr (cpu), 23, 22); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (28, 24, 0x0E); + NYI_assert (21, 21, 1); + + if ((uimm (aarch64_get_instr (cpu), 11, 11) + && uimm (aarch64_get_instr (cpu), 14, 14)) + || ((uimm (aarch64_get_instr (cpu), 11, 11) == 0 + && uimm (aarch64_get_instr (cpu), 10, 10) == 0))) + { + /* A compare vs 0. */ + if (vm != 0) + { + if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x2A) + do_vec_maxv (cpu); + else if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x32 + || uimm (aarch64_get_instr (cpu), 15, 10) == 0x3E) + do_vec_fminmaxV (cpu); + else if (uimm (aarch64_get_instr (cpu), 29, 23) == 0x1C + && uimm (aarch64_get_instr (cpu), 21, 10) == 0x876) + do_vec_SCVTF (cpu); + else + HALT_NYI; + return; + } + } + + if (uimm (aarch64_get_instr (cpu), 14, 14)) + { + /* A floating point compare. */ + unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 5) + | (uimm (aarch64_get_instr (cpu), 23, 23) << 4) + | uimm (aarch64_get_instr (cpu), 13, 10); + + NYI_assert (15, 15, 1); + + switch (decode) + { + case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>); + case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=); + case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==); + case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=); + case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<); + case /* 0b111001: GT */ 0x39: VEC_FCMP (>); + case /* 0b101001: GE */ 0x29: VEC_FCMP (>=); + case /* 0b001001: EQ */ 0x09: VEC_FCMP (==); + + default: + HALT_NYI; + } + } + else + { + unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 6) + | uimm (aarch64_get_instr (cpu), 15, 10); + + switch (decode) + { + case 0x0D: /* 0001101 GT */ VEC_CMP (s, > ); + case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= ); + case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > ); + case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == ); + case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < ); + case 0x4D: /* 1001101 HI */ VEC_CMP (u, > ); + case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= ); + case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= ); + case 0x63: /* 1100011 EQ */ VEC_CMP (u, == ); + case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= ); + default: + if (vm == 0) + HALT_NYI; + do_vec_maxv (cpu); + } + } +} + +static void +do_vec_SSHL (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = first part (0)/ second part (1) + instr[29,24] = 00 1110 + instr[23,22] = size: byte(00), half(01), word (10), long (11) + instr[21] = 1 + instr[20,16] = Vm + instr[15,10] = 0100 01 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x11); + + /* FIXME: What is a signed shift left in this context ?. */ + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) + << aarch64_get_vec_s8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) + << aarch64_get_vec_s16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) + << aarch64_get_vec_s32 (cpu, vm, i)); + return; + + case 3: + if (! full) + HALT_UNALLOC; + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) + << aarch64_get_vec_s64 (cpu, vm, i)); + return; + + default: + HALT_NYI; + } +} + +static void +do_vec_USHL (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = first part (0)/ second part (1) + instr[29,24] = 10 1110 + instr[23,22] = size: byte(00), half(01), word (10), long (11) + instr[21] = 1 + instr[20,16] = Vm + instr[15,10] = 0100 01 + instr[9,5] = Vn + instr[4,0] = Vd */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x2E); + NYI_assert (15, 10, 0x11); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) + << aarch64_get_vec_u8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) + << aarch64_get_vec_u16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) + << aarch64_get_vec_u32 (cpu, vm, i)); + return; + + case 3: + if (! full) + HALT_UNALLOC; + for (i = 0; i < 2; i++) + aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) + << aarch64_get_vec_u64 (cpu, vm, i)); + return; + + default: + HALT_NYI; + } +} + +static void +do_vec_FMLA (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29,23] = 0011100 + instr[22] = size: 0=>float, 1=>double + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 1100 11 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 23, 0x1C); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x33); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + aarch64_get_vec_double (cpu, vn, i) * + aarch64_get_vec_double (cpu, vm, i) + + aarch64_get_vec_double (cpu, vd, i)); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + aarch64_get_vec_float (cpu, vn, i) * + aarch64_get_vec_float (cpu, vm, i) + + aarch64_get_vec_float (cpu, vd, i)); + } +} + +static void +do_vec_max (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29] = SMAX (0) / UMAX (1) + instr[28,24] = 0 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 0110 01 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (28, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x19); + + if (uimm (aarch64_get_instr (cpu), 29, 29)) + { + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i) + > aarch64_get_vec_u8 (cpu, vm, i) + ? aarch64_get_vec_u8 (cpu, vn, i) + : aarch64_get_vec_u8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vn, i) + > aarch64_get_vec_u16 (cpu, vm, i) + ? aarch64_get_vec_u16 (cpu, vn, i) + : aarch64_get_vec_u16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vn, i) + > aarch64_get_vec_u32 (cpu, vm, i) + ? aarch64_get_vec_u32 (cpu, vn, i) + : aarch64_get_vec_u32 (cpu, vm, i)); + return; + + default: + case 3: + HALT_UNALLOC; + } + } + else + { + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, + aarch64_get_vec_s8 (cpu, vn, i) + > aarch64_get_vec_s8 (cpu, vm, i) + ? aarch64_get_vec_s8 (cpu, vn, i) + : aarch64_get_vec_s8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, + aarch64_get_vec_s16 (cpu, vn, i) + > aarch64_get_vec_s16 (cpu, vm, i) + ? aarch64_get_vec_s16 (cpu, vn, i) + : aarch64_get_vec_s16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, + aarch64_get_vec_s32 (cpu, vn, i) + > aarch64_get_vec_s32 (cpu, vm, i) + ? aarch64_get_vec_s32 (cpu, vn, i) + : aarch64_get_vec_s32 (cpu, vm, i)); + return; + + default: + case 3: + HALT_UNALLOC; + } + } +} + +static void +do_vec_min (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half selector + instr[29] = SMIN (0) / UMIN (1) + instr[28,24] = 0 1110 + instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit + instr[21] = 1 + instr[20,16] = Vn + instr[15,10] = 0110 11 + instr[9,5] = Vm + instr[4.0] = Vd. */ + + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (28, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x1B); + + if (uimm (aarch64_get_instr (cpu), 29, 29)) + { + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i) + < aarch64_get_vec_u8 (cpu, vm, i) + ? aarch64_get_vec_u8 (cpu, vn, i) + : aarch64_get_vec_u8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vn, i) + < aarch64_get_vec_u16 (cpu, vm, i) + ? aarch64_get_vec_u16 (cpu, vn, i) + : aarch64_get_vec_u16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vn, i) + < aarch64_get_vec_u32 (cpu, vm, i) + ? aarch64_get_vec_u32 (cpu, vn, i) + : aarch64_get_vec_u32 (cpu, vm, i)); + return; + + default: + case 3: + HALT_UNALLOC; + } + } + else + { + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, + aarch64_get_vec_s8 (cpu, vn, i) + < aarch64_get_vec_s8 (cpu, vm, i) + ? aarch64_get_vec_s8 (cpu, vn, i) + : aarch64_get_vec_s8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, + aarch64_get_vec_s16 (cpu, vn, i) + < aarch64_get_vec_s16 (cpu, vm, i) + ? aarch64_get_vec_s16 (cpu, vn, i) + : aarch64_get_vec_s16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, + aarch64_get_vec_s32 (cpu, vn, i) + < aarch64_get_vec_s32 (cpu, vm, i) + ? aarch64_get_vec_s32 (cpu, vn, i) + : aarch64_get_vec_s32 (cpu, vm, i)); + return; + + default: + case 3: + HALT_UNALLOC; + } + } +} + +static void +do_vec_sub_long (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = lower (0) / upper (1) + instr[29] = signed (0) / unsigned (1) + instr[28,24] = 0 1110 + instr[23,22] = size: bytes (00), half (01), word (10) + instr[21] = 1 + insrt[20,16] = Vm + instr[15,10] = 0010 00 + instr[9,5] = Vn + instr[4,0] = V dest. */ + + unsigned size = uimm (aarch64_get_instr (cpu), 23, 22); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned bias = 0; + unsigned i; + + NYI_assert (28, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x08); + + if (size == 3) + HALT_UNALLOC; + + switch (uimm (aarch64_get_instr (cpu), 30, 29)) + { + case 2: /* SSUBL2. */ + bias = 2; + case 0: /* SSUBL. */ + switch (size) + { + case 0: + bias *= 3; + for (i = 0; i < 8; i++) + aarch64_set_vec_s16 (cpu, vd, i, + aarch64_get_vec_s8 (cpu, vn, i + bias) + - aarch64_get_vec_s8 (cpu, vm, i + bias)); + break; + + case 1: + bias *= 2; + for (i = 0; i < 4; i++) + aarch64_set_vec_s32 (cpu, vd, i, + aarch64_get_vec_s16 (cpu, vn, i + bias) + - aarch64_get_vec_s16 (cpu, vm, i + bias)); + break; + + case 2: + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, + aarch64_get_vec_s32 (cpu, vn, i + bias) + - aarch64_get_vec_s32 (cpu, vm, i + bias)); + break; + + default: + HALT_UNALLOC; + } + break; + + case 3: /* USUBL2. */ + bias = 2; + case 1: /* USUBL. */ + switch (size) + { + case 0: + bias *= 3; + for (i = 0; i < 8; i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vn, i + bias) + - aarch64_get_vec_u8 (cpu, vm, i + bias)); + break; + + case 1: + bias *= 2; + for (i = 0; i < 4; i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vn, i + bias) + - aarch64_get_vec_u16 (cpu, vm, i + bias)); + break; + + case 2: + for (i = 0; i < 2; i++) + aarch64_set_vec_u64 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vn, i + bias) + - aarch64_get_vec_u32 (cpu, vm, i + bias)); + break; + + default: + HALT_UNALLOC; + } + break; + } +} + +#define DO_ADDP(FN) \ + do \ + { \ + for (i = 0; i < range; i++) \ + { \ + aarch64_set_vec_##FN (cpu, vd, i, \ + aarch64_get_vec_##FN (cpu, vn, i * 2) \ + + aarch64_get_vec_##FN (cpu, vn, i * 2 + 1)); \ + aarch64_set_vec_##FN (cpu, vd, i + range, \ + aarch64_get_vec_##FN (cpu, vm, i * 2) \ + + aarch64_get_vec_##FN (cpu, vm, i * 2 + 1)); \ + } \ + } \ + while (0) + +static void +do_vec_ADDP (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 00 1110 + instr[23,22] = size: bytes (00), half (01), word (10), long (11) + instr[21] = 1 + insrt[20,16] = Vm + instr[15,10] = 1011 11 + instr[9,5] = Vn + instr[4,0] = V dest. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 23, 22); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i, range; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x2F); + + switch (size) + { + case 0: + range = full ? 8 : 4; + DO_ADDP (u8); + return; + + case 1: + range = full ? 4 : 2; + DO_ADDP (u16); + return; + + case 2: + range = full ? 2 : 1; + DO_ADDP (u32); + return; + + case 3: + if (! full) + HALT_UNALLOC; + range = 1; + DO_ADDP (u64); + return; + + default: + HALT_NYI; + } +} + +static void +do_vec_UMOV (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = 32-bit(0)/64-bit(1) + instr[29,21] = 00 1110 000 + insrt[20,16] = size & index + instr[15,10] = 0011 11 + instr[9,5] = V source + instr[4,0] = R dest. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned index; + + NYI_assert (29, 21, 0x070); + NYI_assert (15, 10, 0x0F); + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + { + /* Byte transfer. */ + index = uimm (aarch64_get_instr (cpu), 20, 17); + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_vec_u8 (cpu, vs, index)); + } + else if (uimm (aarch64_get_instr (cpu), 17, 17)) + { + index = uimm (aarch64_get_instr (cpu), 20, 18); + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_vec_u16 (cpu, vs, index)); + } + else if (uimm (aarch64_get_instr (cpu), 18, 18)) + { + index = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_vec_u32 (cpu, vs, index)); + } + else + { + if (uimm (aarch64_get_instr (cpu), 30, 30) != 1) + HALT_UNALLOC; + + index = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_vec_u64 (cpu, vs, index)); + } +} + +static void +do_vec_FABS (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,23] = 00 1110 1 + instr[22] = float(0)/double(1) + instr[21,16] = 10 0000 + instr[15,10] = 1111 10 + instr[9,5] = Vn + instr[4,0] = Vd. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned i; + + NYI_assert (29, 23, 0x1D); + NYI_assert (21, 10, 0x83E); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_NYI; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + fabs (aarch64_get_vec_double (cpu, vn, i))); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + fabsf (aarch64_get_vec_float (cpu, vn, i))); + } +} + +static void +do_vec_FCVTZS (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0) / all (1) + instr[29,23] = 00 1110 1 + instr[22] = single (0) / double (1) + instr[21,10] = 10 0001 1011 10 + instr[9,5] = Rn + instr[4,0] = Rd. */ + + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned i; + + NYI_assert (31, 31, 0); + NYI_assert (29, 23, 0x1D); + NYI_assert (21, 10, 0x86E); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, rd, i, + (int64_t) aarch64_get_vec_double (cpu, rn, i)); + } + else + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, rd, i, + (int32_t) aarch64_get_vec_float (cpu, rn, i)); +} + +static void +do_vec_op1 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half/full + instr[29,24] = 00 1110 + instr[23,21] = ??? + instr[20,16] = Vm + instr[15,10] = sub-opcode + instr[9,5] = Vn + instr[4,0] = Vd */ + NYI_assert (29, 24, 0x0E); + + if (uimm (aarch64_get_instr (cpu), 21, 21) == 0) + { + if (uimm (aarch64_get_instr (cpu), 23, 22) == 0) + { + if (uimm (aarch64_get_instr (cpu), 30, 30) == 1 + && uimm (aarch64_get_instr (cpu), 17, 14) == 0 + && uimm (aarch64_get_instr (cpu), 12, 10) == 7) + return do_vec_ins_2 (cpu); + + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x01: do_vec_DUP_vector_into_vector (cpu); return; + case 0x03: do_vec_DUP_scalar_into_vector (cpu); return; + case 0x07: do_vec_INS (cpu); return; + case 0x0A: do_vec_TRN (cpu); return; + + case 0x0F: + if (uimm (aarch64_get_instr (cpu), 17, 16) == 0) + { + do_vec_MOV_into_scalar (cpu); + return; + } + break; + + case 0x00: + case 0x08: + case 0x10: + case 0x18: + do_vec_TBL (cpu); return; + + case 0x06: + case 0x16: + do_vec_UZP (cpu); return; + + case 0x0E: + case 0x1E: + do_vec_ZIP (cpu); return; + + default: + HALT_NYI; + } + } + + switch (uimm (aarch64_get_instr (cpu), 13, 10)) + { + case 0x6: do_vec_UZP (cpu); return; + case 0xE: do_vec_ZIP (cpu); return; + case 0xA: do_vec_TRN (cpu); return; + case 0xF: do_vec_UMOV (cpu); return; + default: HALT_NYI; + } + } + + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x07: + switch (uimm (aarch64_get_instr (cpu), 23, 21)) + { + case 1: do_vec_AND (cpu); return; + case 3: do_vec_BIC (cpu); return; + case 5: do_vec_ORR (cpu); return; + case 7: do_vec_ORN (cpu); return; + default: HALT_NYI; + } + + case 0x08: do_vec_sub_long (cpu); return; + case 0x0a: do_vec_XTN (cpu); return; + case 0x11: do_vec_SSHL (cpu); return; + case 0x19: do_vec_max (cpu); return; + case 0x1B: do_vec_min (cpu); return; + case 0x21: do_vec_add (cpu); return; + case 0x25: do_vec_MLA (cpu); return; + case 0x27: do_vec_mul (cpu); return; + case 0x2F: do_vec_ADDP (cpu); return; + case 0x30: do_vec_mull (cpu); return; + case 0x33: do_vec_FMLA (cpu); return; + case 0x35: do_vec_fadd (cpu); return; + + case 0x2E: + switch (uimm (aarch64_get_instr (cpu), 20, 16)) + { + case 0x00: do_vec_ABS (cpu); return; + case 0x01: do_vec_FCVTZS (cpu); return; + case 0x11: do_vec_ADDV (cpu); return; + default: HALT_NYI; + } + + case 0x31: + case 0x3B: + do_vec_Fminmax (cpu); return; + + case 0x0D: + case 0x0F: + case 0x22: + case 0x23: + case 0x26: + case 0x2A: + case 0x32: + case 0x36: + case 0x39: + case 0x3A: + do_vec_compare (cpu); return; + + case 0x3E: + do_vec_FABS (cpu); return; + + default: + HALT_NYI; + } +} + +static void +do_vec_xtl (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11) + instr[28,22] = 0 1111 00 + instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2) + instr[15,10] = 1010 01 + instr[9,5] = V source + instr[4,0] = V dest. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i, shift, bias = 0; + + NYI_assert (28, 22, 0x3C); + NYI_assert (15, 10, 0x29); + + switch (uimm (aarch64_get_instr (cpu), 30, 29)) + { + case 2: /* SXTL2, SSHLL2. */ + bias = 2; + case 0: /* SXTL, SSHLL. */ + if (uimm (aarch64_get_instr (cpu), 21, 21)) + { + shift = uimm (aarch64_get_instr (cpu), 20, 16); + aarch64_set_vec_s64 + (cpu, vd, 0, aarch64_get_vec_s32 (cpu, vs, bias) << shift); + aarch64_set_vec_s64 + (cpu, vd, 1, aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift); + } + else if (uimm (aarch64_get_instr (cpu), 20, 20)) + { + shift = uimm (aarch64_get_instr (cpu), 19, 16); + bias *= 2; + for (i = 0; i < 4; i++) + aarch64_set_vec_s32 + (cpu, vd, i, aarch64_get_vec_s16 (cpu, vs, i + bias) << shift); + } + else + { + NYI_assert (19, 19, 1); + + shift = uimm (aarch64_get_instr (cpu), 18, 16); + bias *= 3; + for (i = 0; i < 8; i++) + aarch64_set_vec_s16 + (cpu, vd, i, aarch64_get_vec_s8 (cpu, vs, i + bias) << shift); + } + return; + + case 3: /* UXTL2, USHLL2. */ + bias = 2; + case 1: /* UXTL, USHLL. */ + if (uimm (aarch64_get_instr (cpu), 21, 21)) + { + shift = uimm (aarch64_get_instr (cpu), 20, 16); + aarch64_set_vec_u64 + (cpu, vd, 0, aarch64_get_vec_u32 (cpu, vs, bias) << shift); + aarch64_set_vec_u64 + (cpu, vd, 1, aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift); + } + else if (uimm (aarch64_get_instr (cpu), 20, 20)) + { + shift = uimm (aarch64_get_instr (cpu), 19, 16); + bias *= 2; + for (i = 0; i < 4; i++) + aarch64_set_vec_u32 + (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i + bias) << shift); + } + else + { + NYI_assert (19, 19, 1); + + shift = uimm (aarch64_get_instr (cpu), 18, 16); + bias *= 3; + for (i = 0; i < 8; i++) + aarch64_set_vec_u16 + (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, i + bias) << shift); + } + return; + + default: + HALT_NYI; + } +} + +static void +do_vec_SHL (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,23] = 001 1110 + instr [22,16] = size and shift amount + instr [15,10] = 01 0101 + instr [9, 5] = Vs + instr [4, 0] = Vd. */ + + int shift; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 23, 0x1E); + NYI_assert (15, 10, 0x15); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + shift = uimm (aarch64_get_instr (cpu), 21, 16) - 1; + + if (full == 0) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + { + uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); + aarch64_set_vec_u64 (cpu, vd, i, val << shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 21, 21)) + { + shift = uimm (aarch64_get_instr (cpu), 20, 16) - 1; + + for (i = 0; i < (full ? 4 : 2); i++) + { + uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); + aarch64_set_vec_u32 (cpu, vd, i, val << shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 20, 20)) + { + shift = uimm (aarch64_get_instr (cpu), 19, 16) - 1; + + for (i = 0; i < (full ? 8 : 4); i++) + { + uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); + aarch64_set_vec_u16 (cpu, vd, i, val << shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + HALT_UNALLOC; + + shift = uimm (aarch64_get_instr (cpu), 18, 16) - 1; + + for (i = 0; i < (full ? 16 : 8); i++) + { + uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); + aarch64_set_vec_u8 (cpu, vd, i, val << shift); + } +} + +static void +do_vec_SSHR_USHR (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29] = signed(0)/unsigned(1) + instr [28,23] = 01 1110 + instr [22,16] = size and shift amount + instr [15,10] = 0000 01 + instr [9, 5] = Vs + instr [4, 0] = Vd. */ + + int shift; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + int sign = uimm (aarch64_get_instr (cpu), 29, 29); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (28, 23, 0x1E); + NYI_assert (15, 10, 0x01); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + shift = uimm (aarch64_get_instr (cpu), 21, 16); + + if (full == 0) + HALT_UNALLOC; + + if (sign) + for (i = 0; i < 2; i++) + { + int64_t val = aarch64_get_vec_s64 (cpu, vs, i); + aarch64_set_vec_s64 (cpu, vd, i, val >> shift); + } + else + for (i = 0; i < 2; i++) + { + uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); + aarch64_set_vec_u64 (cpu, vd, i, val >> shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 21, 21)) + { + shift = uimm (aarch64_get_instr (cpu), 20, 16); + + if (sign) + for (i = 0; i < (full ? 4 : 2); i++) + { + int32_t val = aarch64_get_vec_s32 (cpu, vs, i); + aarch64_set_vec_s32 (cpu, vd, i, val >> shift); + } + else + for (i = 0; i < (full ? 4 : 2); i++) + { + uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); + aarch64_set_vec_u32 (cpu, vd, i, val >> shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 20, 20)) + { + shift = uimm (aarch64_get_instr (cpu), 19, 16); + + if (sign) + for (i = 0; i < (full ? 8 : 4); i++) + { + int16_t val = aarch64_get_vec_s16 (cpu, vs, i); + aarch64_set_vec_s16 (cpu, vd, i, val >> shift); + } + else + for (i = 0; i < (full ? 8 : 4); i++) + { + uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); + aarch64_set_vec_u16 (cpu, vd, i, val >> shift); + } + + return; + } + + if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + HALT_UNALLOC; + + shift = uimm (aarch64_get_instr (cpu), 18, 16); + + if (sign) + for (i = 0; i < (full ? 16 : 8); i++) + { + int8_t val = aarch64_get_vec_s8 (cpu, vs, i); + aarch64_set_vec_s8 (cpu, vd, i, val >> shift); + } + else + for (i = 0; i < (full ? 16 : 8); i++) + { + uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); + aarch64_set_vec_u8 (cpu, vd, i, val >> shift); + } +} + +static void +do_vec_op2 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half/full + instr[29,24] = 00 1111 + instr[23] = ? + instr[22,16] = element size & index + instr[15,10] = sub-opcode + instr[9,5] = Vm + instr[4.0] = Vd */ + + NYI_assert (29, 24, 0x0F); + + if (uimm (aarch64_get_instr (cpu), 23, 23) != 0) + HALT_NYI; + + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x01: do_vec_SSHR_USHR (cpu); return; + case 0x15: do_vec_SHL (cpu); return; + case 0x29: do_vec_xtl (cpu); return; + default: HALT_NYI; + } +} + +static void +do_vec_neg (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full(1)/half(0) + instr[29,24] = 10 1110 + instr[23,22] = size: byte(00), half (01), word (10), long (11) + instr[21,10] = 1000 0010 1110 + instr[9,5] = Vs + instr[4,0] = Vd */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 10, 0x82E); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i)); + return; + + case 3: + if (! full) + HALT_NYI; + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i)); + return; + + default: + HALT_UNREACHABLE; + } +} + +static void +do_vec_sqrt (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full(1)/half(0) + instr[29,23] = 101 1101 + instr[22] = single(0)/double(1) + instr[21,10] = 1000 0111 1110 + instr[9,5] = Vs + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 23, 0x5B); + NYI_assert (21, 10, 0x87E); + + if (uimm (aarch64_get_instr (cpu), 22, 22) == 0) + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + sqrtf (aarch64_get_vec_float (cpu, vs, i))); + else + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + sqrt (aarch64_get_vec_double (cpu, vs, i))); +} + +static void +do_vec_mls_indexed (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,24] = 10 1111 + instr[23,22] = 16-bit(01)/32-bit(10) + instr[21,20+11] = index (if 16-bit) + instr[21+11] = index (if 32-bit) + instr[20,16] = Vm + instr[15,12] = 0100 + instr[11] = part of index + instr[10] = 0 + instr[9,5] = Vs + instr[4,0] = Vd. */ + + int full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned i; + + NYI_assert (15, 12, 4); + NYI_assert (10, 10, 0); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 1: + { + unsigned elem; + uint32_t val; + + if (vm > 15) + HALT_NYI; + + elem = (uimm (aarch64_get_instr (cpu), 21, 20) << 1) + | uimm (aarch64_get_instr (cpu), 11, 11); + val = aarch64_get_vec_u16 (cpu, vm, elem); + + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vd, i) - + (aarch64_get_vec_u32 (cpu, vs, i) * val)); + return; + } + + case 2: + { + unsigned elem = (uimm (aarch64_get_instr (cpu), 21, 21) << 1) + | uimm (aarch64_get_instr (cpu), 11, 11); + uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem); + + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u64 (cpu, vd, i, + aarch64_get_vec_u64 (cpu, vd, i) - + (aarch64_get_vec_u64 (cpu, vs, i) * val)); + return; + } + + case 0: + case 3: + default: + HALT_NYI; + } +} + +static void +do_vec_SUB (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,24] = 10 1110 + instr [23,22] = size: byte(00, half(01), word (10), long (11) + instr [21] = 1 + instr [20,16] = Vm + instr [15,10] = 10 0001 + instr [9, 5] = Vn + instr [4, 0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x21); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_s8 (cpu, vd, i, + aarch64_get_vec_s8 (cpu, vn, i) + - aarch64_get_vec_s8 (cpu, vm, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_s16 (cpu, vd, i, + aarch64_get_vec_s16 (cpu, vn, i) + - aarch64_get_vec_s16 (cpu, vm, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_s32 (cpu, vd, i, + aarch64_get_vec_s32 (cpu, vn, i) + - aarch64_get_vec_s32 (cpu, vm, i)); + return; + + case 3: + if (full == 0) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_s64 (cpu, vd, i, + aarch64_get_vec_s64 (cpu, vn, i) + - aarch64_get_vec_s64 (cpu, vm, i)); + return; + + default: + HALT_UNREACHABLE; + } +} + +static void +do_vec_MLS (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,24] = 10 1110 + instr [23,22] = size: byte(00, half(01), word (10) + instr [21] = 1 + instr [20,16] = Vm + instr [15,10] = 10 0101 + instr [9, 5] = Vn + instr [4, 0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x25); + + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i)) + - aarch64_get_vec_u8 (cpu, vd, i)); + return; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, + (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i)) + - aarch64_get_vec_u16 (cpu, vd, i)); + return; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i)) + - aarch64_get_vec_u32 (cpu, vd, i)); + return; + + default: + HALT_UNALLOC; + } +} + +static void +do_vec_FDIV (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,23] = 10 1110 0 + instr [22] = float()/double(1) + instr [21] = 1 + instr [20,16] = Vm + instr [15,10] = 1111 11 + instr [9, 5] = Vn + instr [4, 0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 23, 0x5C); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x3F); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + aarch64_get_vec_double (cpu, vn, i) + / aarch64_get_vec_double (cpu, vm, i)); + } + else + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + aarch64_get_vec_float (cpu, vn, i) + / aarch64_get_vec_float (cpu, vm, i)); +} + +static void +do_vec_FMUL (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,23] = 10 1110 0 + instr [22] = float(0)/double(1) + instr [21] = 1 + instr [20,16] = Vm + instr [15,10] = 1101 11 + instr [9, 5] = Vn + instr [4, 0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + NYI_assert (29, 23, 0x5C); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x37); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + aarch64_get_vec_double (cpu, vn, i) + * aarch64_get_vec_double (cpu, vm, i)); + } + else + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + aarch64_get_vec_float (cpu, vn, i) + * aarch64_get_vec_float (cpu, vm, i)); +} + +static void +do_vec_FADDP (sim_cpu *cpu) +{ + /* instr [31] = 0 + instr [30] = half(0)/full(1) + instr [29,23] = 10 1110 0 + instr [22] = float(0)/double(1) + instr [21] = 1 + instr [20,16] = Vm + instr [15,10] = 1101 01 + instr [9, 5] = Vn + instr [4, 0] = Vd. */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (29, 23, 0x5C); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x35); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + aarch64_set_vec_double (cpu, vd, 0, aarch64_get_vec_double (cpu, vn, 0) + + aarch64_get_vec_double (cpu, vn, 1)); + aarch64_set_vec_double (cpu, vd, 1, aarch64_get_vec_double (cpu, vm, 0) + + aarch64_get_vec_double (cpu, vm, 1)); + } + else + { + aarch64_set_vec_float (cpu, vd, 0, aarch64_get_vec_float (cpu, vn, 0) + + aarch64_get_vec_float (cpu, vn, 1)); + if (full) + aarch64_set_vec_float (cpu, vd, 1, aarch64_get_vec_float (cpu, vn, 2) + + aarch64_get_vec_float (cpu, vn, 3)); + aarch64_set_vec_float (cpu, vd, full ? 2 : 1, + aarch64_get_vec_float (cpu, vm, 0) + + aarch64_get_vec_float (cpu, vm, 1)); + if (full) + aarch64_set_vec_float (cpu, vd, 3, + aarch64_get_vec_float (cpu, vm, 2) + + aarch64_get_vec_float (cpu, vm, 3)); + } +} + +static void +do_vec_FSQRT (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half(0)/full(1) + instr[29,23] = 10 1110 1 + instr[22] = single(0)/double(1) + instr[21,10] = 10 0001 1111 10 + instr[9,5] = Vsrc + instr[4,0] = Vdest. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + int i; + + NYI_assert (29, 23, 0x5D); + NYI_assert (21, 10, 0x87E); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + sqrt (aarch64_get_vec_double (cpu, vn, i))); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + sqrtf (aarch64_get_vec_float (cpu, vn, i))); + } +} + +static void +do_vec_FNEG (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,23] = 10 1110 1 + instr[22] = single (0)/double (1) + instr[21,10] = 10 0000 1111 10 + instr[9,5] = Vsrc + instr[4,0] = Vdest. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + int i; + + NYI_assert (29, 23, 0x5D); + NYI_assert (21, 10, 0x83E); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + if (! full) + HALT_UNALLOC; + + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, vd, i, + - aarch64_get_vec_double (cpu, vn, i)); + } + else + { + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_float (cpu, vd, i, + - aarch64_get_vec_float (cpu, vn, i)); + } +} + +static void +do_vec_NOT (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,21] = 10 1110 001 + instr[20,16] = 0 0000 + instr[15,10] = 0101 10 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + int full = uimm (aarch64_get_instr (cpu), 30, 30); + + NYI_assert (29, 10, 0xB8816); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i)); +} + +static void +do_vec_MOV_element (sim_cpu *cpu) +{ + /* instr[31,21] = 0110 1110 000 + instr[20,16] = size & dest index + instr[15] = 0 + instr[14,11] = source index + instr[10] = 1 + instr[9,5] = Vs + instr[4.0] = Vd. */ + + unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned src_index; + unsigned dst_index; + + NYI_assert (31, 21, 0x370); + NYI_assert (15, 15, 0); + NYI_assert (10, 10, 1); + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + { + /* Move a byte. */ + src_index = uimm (aarch64_get_instr (cpu), 14, 11); + dst_index = uimm (aarch64_get_instr (cpu), 20, 17); + aarch64_set_vec_u8 (cpu, vd, dst_index, + aarch64_get_vec_u8 (cpu, vs, src_index)); + } + else if (uimm (aarch64_get_instr (cpu), 17, 17)) + { + /* Move 16-bits. */ + NYI_assert (11, 11, 0); + src_index = uimm (aarch64_get_instr (cpu), 14, 12); + dst_index = uimm (aarch64_get_instr (cpu), 20, 18); + aarch64_set_vec_u16 (cpu, vd, dst_index, + aarch64_get_vec_u16 (cpu, vs, src_index)); + } + else if (uimm (aarch64_get_instr (cpu), 18, 18)) + { + /* Move 32-bits. */ + NYI_assert (12, 11, 0); + src_index = uimm (aarch64_get_instr (cpu), 14, 13); + dst_index = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_vec_u32 (cpu, vd, dst_index, + aarch64_get_vec_u32 (cpu, vs, src_index)); + } + else + { + NYI_assert (19, 19, 1); + NYI_assert (13, 11, 0); + src_index = uimm (aarch64_get_instr (cpu), 14, 14); + dst_index = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_vec_u64 (cpu, vd, dst_index, + aarch64_get_vec_u64 (cpu, vs, src_index)); + } +} + +static void +dexAdvSIMD0 (sim_cpu *cpu) +{ + /* instr [28,25] = 0 111. */ + if ( uimm (aarch64_get_instr (cpu), 15, 10) == 0x07 + && (uimm (aarch64_get_instr (cpu), 9, 5) == + uimm (aarch64_get_instr (cpu), 20, 16))) + { + if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x075 + || uimm (aarch64_get_instr (cpu), 31, 21) == 0x275) + { + do_vec_MOV_whole_vector (cpu); + return; + } + } + + if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1E0) + { + do_vec_MOV_immediate (cpu); + return; + } + + if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x5E0) + { + do_vec_MVNI (cpu); + return; + } + + if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C0 + || uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C1) + { + if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x03) + { + do_vec_DUP_scalar_into_vector (cpu); + return; + } + } + + switch (uimm (aarch64_get_instr (cpu), 29, 24)) + { + case 0x0E: do_vec_op1 (cpu); return; + case 0x0F: do_vec_op2 (cpu); return; + + case 0x2f: + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x01: do_vec_SSHR_USHR (cpu); return; + case 0x10: + case 0x12: do_vec_mls_indexed (cpu); return; + case 0x29: do_vec_xtl (cpu); return; + default: + HALT_NYI; + } + + case 0x2E: + if (uimm (aarch64_get_instr (cpu), 21, 21) == 1) + { + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x07: + switch (uimm (aarch64_get_instr (cpu), 23, 22)) + { + case 0: do_vec_EOR (cpu); return; + case 1: do_vec_BSL (cpu); return; + case 2: + case 3: do_vec_bit (cpu); return; + } + break; + + case 0x08: do_vec_sub_long (cpu); return; + case 0x11: do_vec_USHL (cpu); return; + case 0x16: do_vec_NOT (cpu); return; + case 0x19: do_vec_max (cpu); return; + case 0x1B: do_vec_min (cpu); return; + case 0x21: do_vec_SUB (cpu); return; + case 0x25: do_vec_MLS (cpu); return; + case 0x31: do_vec_FminmaxNMP (cpu); return; + case 0x35: do_vec_FADDP (cpu); return; + case 0x37: do_vec_FMUL (cpu); return; + case 0x3F: do_vec_FDIV (cpu); return; + + case 0x3E: + switch (uimm (aarch64_get_instr (cpu), 20, 16)) + { + case 0x00: do_vec_FNEG (cpu); return; + case 0x01: do_vec_FSQRT (cpu); return; + default: HALT_NYI; + } + + case 0x0D: + case 0x0F: + case 0x22: + case 0x23: + case 0x26: + case 0x2A: + case 0x32: + case 0x36: + case 0x39: + case 0x3A: + do_vec_compare (cpu); return; + + default: break; + } + } + + if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x370) + { + do_vec_MOV_element (cpu); + return; + } + + switch (uimm (aarch64_get_instr (cpu), 21, 10)) + { + case 0x82E: do_vec_neg (cpu); return; + case 0x87E: do_vec_sqrt (cpu); return; + default: + if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x30) + { + do_vec_mull (cpu); + return; + } + break; + } + break; + + default: + break; + } + + HALT_NYI; +} + +/* 3 sources. */ + +/* Float multiply add. */ +static void +fmadds (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) + + aarch64_get_FP_float (cpu, sn) + * aarch64_get_FP_float (cpu, sm)); +} + +/* Double multiply add. */ +static void +fmaddd (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) + + aarch64_get_FP_double (cpu, sn) + * aarch64_get_FP_double (cpu, sm)); +} + +/* Float multiply subtract. */ +static void +fmsubs (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) + - aarch64_get_FP_float (cpu, sn) + * aarch64_get_FP_float (cpu, sm)); +} + +/* Double multiply subtract. */ +static void +fmsubd (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) + - aarch64_get_FP_double (cpu, sn) + * aarch64_get_FP_double (cpu, sm)); +} + +/* Float negative multiply add. */ +static void +fnmadds (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) + + (- aarch64_get_FP_float (cpu, sn)) + * aarch64_get_FP_float (cpu, sm)); +} + +/* Double negative multiply add. */ +static void +fnmaddd (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) + + (- aarch64_get_FP_double (cpu, sn)) + * aarch64_get_FP_double (cpu, sm)); +} + +/* Float negative multiply subtract. */ +static void +fnmsubs (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) + + aarch64_get_FP_float (cpu, sn) + * aarch64_get_FP_float (cpu, sm)); +} + +/* Double negative multiply subtract. */ +static void +fnmsubd (sim_cpu *cpu) +{ + unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) + + aarch64_get_FP_double (cpu, sn) + * aarch64_get_FP_double (cpu, sm)); +} + +static void +dexSimpleFPDataProc3Source (sim_cpu *cpu) +{ + /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC + instr[30] = 0 + instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC + instr[28,25] = 1111 + instr[24] = 1 + instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC + instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated + instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */ + + uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) + | uimm (aarch64_get_instr (cpu), 29, 29); + /* dispatch on combined type:o1:o2. */ + uint32_t dispatch = (uimm (aarch64_get_instr (cpu), 23, 21) << 1) + | uimm (aarch64_get_instr (cpu), 15, 15); + + if (M_S != 0) + HALT_UNALLOC; + + switch (dispatch) + { + case 0: fmadds (cpu); return; + case 1: fmsubs (cpu); return; + case 2: fnmadds (cpu); return; + case 3: fnmsubs (cpu); return; + case 4: fmaddd (cpu); return; + case 5: fmsubd (cpu); return; + case 6: fnmaddd (cpu); return; + case 7: fnmsubd (cpu); return; + default: + /* type > 1 is currently unallocated. */ + HALT_UNALLOC; + } +} + +static void +dexSimpleFPFixedConvert (sim_cpu *cpu) +{ + HALT_NYI; +} + +static void +dexSimpleFPCondCompare (sim_cpu *cpu) +{ + HALT_NYI; +} + +/* 2 sources. */ + +/* Float add. */ +static void +fadds (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) + + aarch64_get_FP_float (cpu, sm)); +} + +/* Double add. */ +static void +faddd (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) + + aarch64_get_FP_double (cpu, sm)); +} + +/* Float divide. */ +static void +fdivs (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) + / aarch64_get_FP_float (cpu, sm)); +} + +/* Double divide. */ +static void +fdivd (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) + / aarch64_get_FP_double (cpu, sm)); +} + +/* Float multiply. */ +static void +fmuls (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) + * aarch64_get_FP_float (cpu, sm)); +} + +/* Double multiply. */ +static void +fmuld (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) + * aarch64_get_FP_double (cpu, sm)); +} + +/* Float negate and multiply. */ +static void +fnmuls (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn) + * aarch64_get_FP_float (cpu, sm))); +} + +/* Double negate and multiply. */ +static void +fnmuld (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn) + * aarch64_get_FP_double (cpu, sm))); +} + +/* Float subtract. */ +static void +fsubs (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) + - aarch64_get_FP_float (cpu, sm)); +} + +/* Double subtract. */ +static void +fsubd (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) + - aarch64_get_FP_double (cpu, sm)); +} + +static void +do_FMINNM (sim_cpu *cpu) +{ + /* instr[31,23] = 0 0011 1100 + instr[22] = float(0)/double(1) + instr[21] = 1 + instr[20,16] = Sm + instr[15,10] = 01 1110 + instr[9,5] = Sn + instr[4,0] = Cpu */ + + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x03C); + NYI_assert (15, 10, 0x1E); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, sd, + dminnm (aarch64_get_FP_double (cpu, sn), + aarch64_get_FP_double (cpu, sm))); + else + aarch64_set_FP_float (cpu, sd, + fminnm (aarch64_get_FP_float (cpu, sn), + aarch64_get_FP_float (cpu, sm))); +} + +static void +do_FMAXNM (sim_cpu *cpu) +{ + /* instr[31,23] = 0 0011 1100 + instr[22] = float(0)/double(1) + instr[21] = 1 + instr[20,16] = Sm + instr[15,10] = 01 1010 + instr[9,5] = Sn + instr[4,0] = Cpu */ + + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x03C); + NYI_assert (15, 10, 0x1A); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, sd, + dmaxnm (aarch64_get_FP_double (cpu, sn), + aarch64_get_FP_double (cpu, sm))); + else + aarch64_set_FP_float (cpu, sd, + fmaxnm (aarch64_get_FP_float (cpu, sn), + aarch64_get_FP_float (cpu, sm))); +} + +static void +dexSimpleFPDataProc2Source (sim_cpu *cpu) +{ + /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC + instr[30] = 0 + instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC + instr[28,25] = 1111 + instr[24] = 0 + instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC + instr[21] = 1 + instr[20,16] = Vm + instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV + 0010 ==> FADD, 0011 ==> FSUB, + 0100 ==> FMAX, 0101 ==> FMIN + 0110 ==> FMAXNM, 0111 ==> FMINNM + 1000 ==> FNMUL, ow ==> UNALLOC + instr[11,10] = 10 + instr[9,5] = Vn + instr[4,0] = Vd */ + + uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) + | uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); + /* Dispatch on opcode. */ + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 15, 12); + + if (type > 1) + HALT_UNALLOC; + + if (M_S != 0) + HALT_UNALLOC; + + if (type) + switch (dispatch) + { + case 0: fmuld (cpu); return; + case 1: fdivd (cpu); return; + case 2: faddd (cpu); return; + case 3: fsubd (cpu); return; + case 6: do_FMAXNM (cpu); return; + case 7: do_FMINNM (cpu); return; + case 8: fnmuld (cpu); return; + + /* Have not yet implemented fmax and fmin. */ + case 4: + case 5: + HALT_NYI; + + default: + HALT_UNALLOC; + } + else /* type == 0 => floats. */ + switch (dispatch) + { + case 0: fmuls (cpu); return; + case 1: fdivs (cpu); return; + case 2: fadds (cpu); return; + case 3: fsubs (cpu); return; + case 6: do_FMAXNM (cpu); return; + case 7: do_FMINNM (cpu); return; + case 8: fnmuls (cpu); return; + + case 4: + case 5: + HALT_NYI; + + default: + HALT_UNALLOC; + } +} + +static void +dexSimpleFPCondSelect (sim_cpu *cpu) +{ + /* FCSEL + instr[31,23] = 0 0011 1100 + instr[22] = 0=>single 1=>double + instr[21] = 1 + instr[20,16] = Sm + instr[15,12] = cond + instr[11,10] = 11 + instr[9,5] = Sn + instr[4,0] = Cpu */ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t set = testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12)); + + NYI_assert (31, 23, 0x03C); + NYI_assert (11, 10, 0x3); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, sd, set ? sn : sm); + else + aarch64_set_FP_float (cpu, sd, set ? sn : sm); +} + +/* Store 32 bit unscaled signed 9 bit. */ +static void +fsturs (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_float (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset, + aarch64_get_FP_float (cpu, rn)); +} + +/* Store 64 bit unscaled signed 9 bit. */ +static void +fsturd (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_mem_double (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset, + aarch64_get_FP_double (cpu, rn)); +} + +/* Store 128 bit unscaled signed 9 bit. */ +static void +fsturq (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + FRegister a; + + aarch64_get_FP_long_double (cpu, rn, & a); + aarch64_set_mem_long_double (cpu, + aarch64_get_reg_u64 (cpu, st, 1) + + offset, a); +} + +/* TODO FP move register. */ + +/* 32 bit fp to fp move register. */ +static void +ffmovs (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn)); +} + +/* 64 bit fp to fp move register. */ +static void +ffmovd (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn)); +} + +/* 32 bit GReg to Vec move register. */ +static void +fgmovs (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP)); +} + +/* 64 bit g to fp move register. */ +static void +fgmovd (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP)); +} + +/* 32 bit fp to g move register. */ +static void +gfmovs (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0)); +} + +/* 64 bit fp to g move register. */ +static void +gfmovd (sim_cpu *cpu) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0)); +} + +/* FP move immediate + + These install an immediate 8 bit value in the target register + where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3 + bit exponent. */ + +static void +fmovs (sim_cpu *cpu) +{ + unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13); + float f = fp_immediate_for_encoding_32 (imm); + + aarch64_set_FP_float (cpu, sd, f); +} + +static void +fmovd (sim_cpu *cpu) +{ + unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13); + double d = fp_immediate_for_encoding_64 (imm); + + aarch64_set_FP_double (cpu, sd, d); +} + +static void +dexSimpleFPImmediate (sim_cpu *cpu) +{ + /* instr[31,23] == 00111100 + instr[22] == type : single(0)/double(1) + instr[21] == 1 + instr[20,13] == imm8 + instr[12,10] == 100 + instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC + instr[4,0] == Rd */ + uint32_t imm5 = uimm (aarch64_get_instr (cpu), 9, 5); + + NYI_assert (31, 23, 0x3C); + + if (imm5 != 0) + HALT_UNALLOC; + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + fmovd (cpu); + else + fmovs (cpu); +} + +/* TODO specific decode and execute for group Load Store. */ + +/* TODO FP load/store single register (unscaled offset). */ + +/* TODO load 8 bit unscaled signed 9 bit. */ +/* TODO load 16 bit unscaled signed 9 bit. */ + +/* Load 32 bit unscaled signed 9 bit. */ +static void +fldurs (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, st, aarch64_get_mem_float + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); +} + +/* Load 64 bit unscaled signed 9 bit. */ +static void +fldurd (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, st, aarch64_get_mem_double + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); +} + +/* Load 128 bit unscaled signed 9 bit. */ +static void +fldurq (sim_cpu *cpu, int32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + FRegister a; + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; + + aarch64_get_mem_long_double (cpu, addr, & a); + aarch64_set_FP_long_double (cpu, st, a); +} + +/* TODO store 8 bit unscaled signed 9 bit. */ +/* TODO store 16 bit unscaled signed 9 bit. */ + + +/* 1 source. */ + +/* Float absolute value. */ +static void +fabss (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + float value = aarch64_get_FP_float (cpu, sn); + + aarch64_set_FP_float (cpu, sd, fabsf (value)); +} + +/* Double absolute value. */ +static void +fabcpu (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + double value = aarch64_get_FP_double (cpu, sn); + + aarch64_set_FP_double (cpu, sd, fabs (value)); +} + +/* Float negative value. */ +static void +fnegs (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn)); +} + +/* Double negative value. */ +static void +fnegd (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn)); +} + +/* Float square root. */ +static void +fsqrts (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn))); +} + +/* Double square root. */ +static void +fsqrtd (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, + sqrt (aarch64_get_FP_double (cpu, sn))); +} + +/* Convert double to float. */ +static void +fcvtds (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn)); +} + +/* Convert float to double. */ +static void +fcvtcpu (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn)); +} + +static void +do_FRINT (sim_cpu *cpu) +{ + /* instr[31,23] = 0001 1110 0 + instr[22] = single(0)/double(1) + instr[21,18] = 1001 + instr[17,15] = rounding mode + instr[14,10] = 10000 + instr[9,5] = source + instr[4,0] = dest */ + + float val; + unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rmode = uimm (aarch64_get_instr (cpu), 17, 15); + + NYI_assert (31, 23, 0x03C); + NYI_assert (21, 18, 0x9); + NYI_assert (14, 10, 0x10); + + if (rmode == 6 || rmode == 7) + /* FIXME: Add support for rmode == 6 exactness check. */ + rmode = uimm (aarch64_get_FPSR (cpu), 23, 22); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double val = aarch64_get_FP_double (cpu, rs); + + switch (rmode) + { + case 0: /* mode N: nearest or even. */ + { + double rval = round (val); + + if (val - rval == 0.5) + { + if (((rval / 2.0) * 2.0) != rval) + rval += 1.0; + } + + aarch64_set_FP_double (cpu, rd, round (val)); + return; + } + + case 1: /* mode P: towards +inf. */ + if (val < 0.0) + aarch64_set_FP_double (cpu, rd, trunc (val)); + else + aarch64_set_FP_double (cpu, rd, round (val)); + return; + + case 2: /* mode M: towards -inf. */ + if (val < 0.0) + aarch64_set_FP_double (cpu, rd, round (val)); + else + aarch64_set_FP_double (cpu, rd, trunc (val)); + return; + + case 3: /* mode Z: towards 0. */ + aarch64_set_FP_double (cpu, rd, trunc (val)); + return; + + case 4: /* mode A: away from 0. */ + aarch64_set_FP_double (cpu, rd, round (val)); + return; + + case 6: /* mode X: use FPCR with exactness check. */ + case 7: /* mode I: use FPCR mode. */ + HALT_NYI; + + default: + HALT_UNALLOC; + } + } + + val = aarch64_get_FP_float (cpu, rs); + + switch (rmode) + { + case 0: /* mode N: nearest or even. */ + { + float rval = roundf (val); + + if (val - rval == 0.5) + { + if (((rval / 2.0) * 2.0) != rval) + rval += 1.0; + } + + aarch64_set_FP_float (cpu, rd, rval); + return; + } + + case 1: /* mode P: towards +inf. */ + if (val < 0.0) + aarch64_set_FP_float (cpu, rd, truncf (val)); + else + aarch64_set_FP_float (cpu, rd, roundf (val)); + return; + + case 2: /* mode M: towards -inf. */ + if (val < 0.0) + aarch64_set_FP_float (cpu, rd, truncf (val)); + else + aarch64_set_FP_float (cpu, rd, roundf (val)); + return; + + case 3: /* mode Z: towards 0. */ + aarch64_set_FP_float (cpu, rd, truncf (val)); + return; + + case 4: /* mode A: away from 0. */ + aarch64_set_FP_float (cpu, rd, roundf (val)); + return; + + case 6: /* mode X: use FPCR with exactness check. */ + case 7: /* mode I: use FPCR mode. */ + HALT_NYI; + + default: + HALT_UNALLOC; + } +} + +static void +dexSimpleFPDataProc1Source (sim_cpu *cpu) +{ + /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC + instr[30] = 0 + instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC + instr[28,25] = 1111 + instr[24] = 0 + instr[23,22] ==> type : 00 ==> source is single, + 01 ==> source is double + 10 ==> UNALLOC + 11 ==> UNALLOC or source is half + instr[21] = 1 + instr[20,15] ==> opcode : with type 00 or 01 + 000000 ==> FMOV, 000001 ==> FABS, + 000010 ==> FNEG, 000011 ==> FSQRT, + 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double) + 000110 ==> UNALLOC, 000111 ==> FCVT (to half) + 001000 ==> FRINTN, 001001 ==> FRINTP, + 001010 ==> FRINTM, 001011 ==> FRINTZ, + 001100 ==> FRINTA, 001101 ==> UNALLOC + 001110 ==> FRINTX, 001111 ==> FRINTI + with type 11 + 000100 ==> FCVT (half-to-single) + 000101 ==> FCVT (half-to-double) + instr[14,10] = 10000. */ + + uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) + | uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); + uint32_t opcode = uimm (aarch64_get_instr (cpu), 20, 15); + + if (M_S != 0) + HALT_UNALLOC; + + if (type == 3) + { + if (opcode == 4 || opcode == 5) + HALT_NYI; + else + HALT_UNALLOC; + } + + if (type == 2) + HALT_UNALLOC; + + switch (opcode) + { + case 0: + if (type) + ffmovd (cpu); + else + ffmovs (cpu); + return; + + case 1: + if (type) + fabcpu (cpu); + else + fabss (cpu); + return; + + case 2: + if (type) + fnegd (cpu); + else + fnegs (cpu); + return; + + case 3: + if (type) + fsqrtd (cpu); + else + fsqrts (cpu); + return; + + case 4: + if (type) + fcvtds (cpu); + else + HALT_UNALLOC; + return; + + case 5: + if (type) + HALT_UNALLOC; + fcvtcpu (cpu); + return; + + case 8: /* FRINTN etc. */ + case 9: + case 10: + case 11: + case 12: + case 14: + case 15: + do_FRINT (cpu); + return; + + case 7: /* FCVT double/single to half precision. */ + case 13: + HALT_NYI; + + default: + HALT_UNALLOC; + } +} + +/* 32 bit signed int to float. */ +static void +scvtf32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float + (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP)); +} + +/* signed int to float. */ +static void +scvtf (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_float + (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP)); +} + +/* 32 bit signed int to double. */ +static void +scvtd32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double + (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP)); +} + +/* signed int to double. */ +static void +scvtd (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_FP_double + (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP)); +} + +static const float FLOAT_INT_MAX = (float) INT_MAX; +static const float FLOAT_INT_MIN = (float) INT_MIN; +static const double DOUBLE_INT_MAX = (double) INT_MAX; +static const double DOUBLE_INT_MIN = (double) INT_MIN; +static const float FLOAT_LONG_MAX = (float) LONG_MAX; +static const float FLOAT_LONG_MIN = (float) LONG_MIN; +static const double DOUBLE_LONG_MAX = (double) LONG_MAX; +static const double DOUBLE_LONG_MIN = (double) LONG_MIN; + +/* Check for FP exception conditions: + NaN raises IO + Infinity raises IO + Out of Range raises IO and IX and saturates value + Denormal raises ID and IX and sets to zero. */ +#define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \ + do \ + { \ + switch (fpclassify (F)) \ + { \ + case FP_INFINITE: \ + case FP_NAN: \ + aarch64_set_FPSR (cpu, IO); \ + if (signbit (F)) \ + VALUE = ITYPE##_MAX; \ + else \ + VALUE = ITYPE##_MIN; \ + break; \ + \ + case FP_NORMAL: \ + if (F >= FTYPE##_##ITYPE##_MAX) \ + { \ + aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ + VALUE = ITYPE##_MAX; \ + } \ + else if (F <= FTYPE##_##ITYPE##_MIN) \ + { \ + aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ + VALUE = ITYPE##_MIN; \ + } \ + break; \ + \ + case FP_SUBNORMAL: \ + aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \ + VALUE = 0; \ + break; \ + \ + default: \ + case FP_ZERO: \ + VALUE = 0; \ + break; \ + } \ + } \ + while (0) + +/* 32 bit convert float to signed int truncate towards zero. */ +static void +fcvtszs32 (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + /* TODO : check that this rounds toward zero. */ + float f = aarch64_get_FP_float (cpu, sn); + int32_t value = (int32_t) f; + + RAISE_EXCEPTIONS (f, value, FLOAT, INT); + + /* Avoid sign extension to 64 bit. */ + aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); +} + +/* 64 bit convert float to signed int truncate towards zero. */ +static void +fcvtszs (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + float f = aarch64_get_FP_float (cpu, sn); + int64_t value = (int64_t) f; + + RAISE_EXCEPTIONS (f, value, FLOAT, LONG); + + aarch64_set_reg_s64 (cpu, rd, NO_SP, value); +} + +/* 32 bit convert double to signed int truncate towards zero. */ +static void +fcvtszd32 (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + /* TODO : check that this rounds toward zero. */ + double d = aarch64_get_FP_double (cpu, sn); + int32_t value = (int32_t) d; + + RAISE_EXCEPTIONS (d, value, DOUBLE, INT); + + /* Avoid sign extension to 64 bit. */ + aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); +} + +/* 64 bit convert double to signed int truncate towards zero. */ +static void +fcvtszd (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + /* TODO : check that this rounds toward zero. */ + double d = aarch64_get_FP_double (cpu, sn); + int64_t value; + + value = (int64_t) d; + + RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); + + aarch64_set_reg_s64 (cpu, rd, NO_SP, value); +} + +static void +do_fcvtzu (sim_cpu *cpu) +{ + /* instr[31] = size: 32-bit (0), 64-bit (1) + instr[30,23] = 00111100 + instr[22] = type: single (0)/ double (1) + instr[21] = enable (0)/disable(1) precision + instr[20,16] = 11001 + instr[15,10] = precision + instr[9,5] = Rs + instr[4,0] = Rd. */ + + unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (30, 23, 0x3C); + NYI_assert (20, 16, 0x19); + + if (uimm (aarch64_get_instr (cpu), 21, 21) != 1) + /* Convert to fixed point. */ + HALT_NYI; + + if (uimm (aarch64_get_instr (cpu), 31, 31)) + { + /* Convert to unsigned 64-bit integer. */ + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double d = aarch64_get_FP_double (cpu, rs); + uint64_t value = (uint64_t) d; + + /* Do not raise an exception if we have reached ULONG_MAX. */ + if (value != (1UL << 63)) + RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value); + } + else + { + float f = aarch64_get_FP_float (cpu, rs); + uint64_t value = (uint64_t) f; + + /* Do not raise an exception if we have reached ULONG_MAX. */ + if (value != (1UL << 63)) + RAISE_EXCEPTIONS (f, value, FLOAT, LONG); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value); + } + } + else + { + uint32_t value; + + /* Convert to unsigned 32-bit integer. */ + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double d = aarch64_get_FP_double (cpu, rs); + + value = (uint32_t) d; + /* Do not raise an exception if we have reached UINT_MAX. */ + if (value != (1UL << 31)) + RAISE_EXCEPTIONS (d, value, DOUBLE, INT); + } + else + { + float f = aarch64_get_FP_float (cpu, rs); + + value = (uint32_t) f; + /* Do not raise an exception if we have reached UINT_MAX. */ + if (value != (1UL << 31)) + RAISE_EXCEPTIONS (f, value, FLOAT, INT); + } + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value); + } +} + +static void +do_UCVTF (sim_cpu *cpu) +{ + /* instr[31] = size: 32-bit (0), 64-bit (1) + instr[30,23] = 001 1110 0 + instr[22] = type: single (0)/ double (1) + instr[21] = enable (0)/disable(1) precision + instr[20,16] = 0 0011 + instr[15,10] = precision + instr[9,5] = Rs + instr[4,0] = Rd. */ + + unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (30, 23, 0x3C); + NYI_assert (20, 16, 0x03); + + if (uimm (aarch64_get_instr (cpu), 21, 21) != 1) + HALT_NYI; + + /* FIXME: Add exception raising. */ + if (uimm (aarch64_get_instr (cpu), 31, 31)) + { + uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, rd, (double) value); + else + aarch64_set_FP_float (cpu, rd, (float) value); + } + else + { + uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, rd, (double) value); + else + aarch64_set_FP_float (cpu, rd, (float) value); + } +} + +static void +float_vector_move (sim_cpu *cpu) +{ + /* instr[31,17] == 100 1111 0101 0111 + instr[16] ==> direction 0=> to GR, 1=> from GR + instr[15,10] => ??? + instr[9,5] ==> source + instr[4,0] ==> dest. */ + + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 17, 0x4F57); + + if (uimm (aarch64_get_instr (cpu), 15, 10) != 0) + HALT_UNALLOC; + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP)); + else + aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1)); +} + +static void +dexSimpleFPIntegerConvert (sim_cpu *cpu) +{ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30 = 0 + instr[29] = S : 0 ==> OK, 1 ==> UNALLOC + instr[28,25] = 1111 + instr[24] = 0 + instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC + instr[21] = 1 + instr[20,19] = rmode + instr[18,16] = opcode + instr[15,10] = 10 0000 */ + + uint32_t rmode_opcode; + uint32_t size_type; + uint32_t type; + uint32_t size; + uint32_t S; + + if (uimm (aarch64_get_instr (cpu), 31, 17) == 0x4F57) + { + float_vector_move (cpu); + return; + } + + size = uimm (aarch64_get_instr (cpu), 31, 31); + S = uimm (aarch64_get_instr (cpu), 29, 29); + if (S != 0) + HALT_UNALLOC; + + type = uimm (aarch64_get_instr (cpu), 23, 22); + if (type > 1) + HALT_UNALLOC; + + rmode_opcode = uimm (aarch64_get_instr (cpu), 20, 16); + size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */ + + switch (rmode_opcode) + { + case 2: /* SCVTF. */ + switch (size_type) + { + case 0: scvtf32 (cpu); return; + case 1: scvtd32 (cpu); return; + case 2: scvtf (cpu); return; + case 3: scvtd (cpu); return; + default: + HALT_UNREACHABLE; + } + + case 6: /* FMOV GR, Vec. */ + switch (size_type) + { + case 0: gfmovs (cpu); return; + case 3: gfmovd (cpu); return; + default: HALT_UNALLOC; + } + + case 7: /* FMOV vec, GR. */ + switch (size_type) + { + case 0: fgmovs (cpu); return; + case 3: fgmovd (cpu); return; + default: HALT_UNALLOC; + } + + case 24: /* FCVTZS. */ + switch (size_type) + { + case 0: fcvtszs32 (cpu); return; + case 1: fcvtszd32 (cpu); return; + case 2: fcvtszs (cpu); return; + case 3: fcvtszd (cpu); return; + default: HALT_UNREACHABLE; + } + + case 25: do_fcvtzu (cpu); return; + case 3: do_UCVTF (cpu); return; + + case 0: /* FCVTNS. */ + case 1: /* FCVTNU. */ + case 4: /* FCVTAS. */ + case 5: /* FCVTAU. */ + case 8: /* FCVPTS. */ + case 9: /* FCVTPU. */ + case 16: /* FCVTMS. */ + case 17: /* FCVTMU. */ + default: + HALT_NYI; + } +} + +static void +set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) +{ + uint32_t flags; + + if (isnan (fvalue1) || isnan (fvalue2)) + flags = C|V; + else + { + float result = fvalue1 - fvalue2; + + if (result == 0.0) + flags = Z|C; + else if (result < 0) + flags = N; + else /* (result > 0). */ + flags = C; + } + + aarch64_set_CPSR (cpu, flags); +} + +static void +fcmps (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + + float fvalue1 = aarch64_get_FP_float (cpu, sn); + float fvalue2 = aarch64_get_FP_float (cpu, sm); + + set_flags_for_float_compare (cpu, fvalue1, fvalue2); +} + +/* Float compare to zero -- Invalid Operation exception + only on signaling NaNs. */ +static void +fcmpzs (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + float fvalue1 = aarch64_get_FP_float (cpu, sn); + + set_flags_for_float_compare (cpu, fvalue1, 0.0f); +} + +/* Float compare -- Invalid Operation exception on all NaNs. */ +static void +fcmpes (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + + float fvalue1 = aarch64_get_FP_float (cpu, sn); + float fvalue2 = aarch64_get_FP_float (cpu, sm); + + set_flags_for_float_compare (cpu, fvalue1, fvalue2); +} + +/* Float compare to zero -- Invalid Operation exception on all NaNs. */ +static void +fcmpzes (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + float fvalue1 = aarch64_get_FP_float (cpu, sn); + + set_flags_for_float_compare (cpu, fvalue1, 0.0f); +} + +static void +set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) +{ + uint32_t flags; + + if (isnan (dval1) || isnan (dval2)) + flags = C|V; + else + { + double result = dval1 - dval2; + + if (result == 0.0) + flags = Z|C; + else if (result < 0) + flags = N; + else /* (result > 0). */ + flags = C; + } + + aarch64_set_CPSR (cpu, flags); +} + +/* Double compare -- Invalid Operation exception only on signaling NaNs. */ +static void +fcmpd (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + + double dvalue1 = aarch64_get_FP_double (cpu, sn); + double dvalue2 = aarch64_get_FP_double (cpu, sm); + + set_flags_for_double_compare (cpu, dvalue1, dvalue2); +} + +/* Double compare to zero -- Invalid Operation exception + only on signaling NaNs. */ +static void +fcmpzd (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + double dvalue1 = aarch64_get_FP_double (cpu, sn); + + set_flags_for_double_compare (cpu, dvalue1, 0.0); +} + +/* Double compare -- Invalid Operation exception on all NaNs. */ +static void +fcmped (sim_cpu *cpu) +{ + unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + + double dvalue1 = aarch64_get_FP_double (cpu, sn); + double dvalue2 = aarch64_get_FP_double (cpu, sm); + + set_flags_for_double_compare (cpu, dvalue1, dvalue2); +} + +/* Double compare to zero -- Invalid Operation exception on all NaNs. */ +static void +fcmpzed (sim_cpu *cpu) +{ + unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + double dvalue1 = aarch64_get_FP_double (cpu, sn); + + set_flags_for_double_compare (cpu, dvalue1, 0.0); +} + +static void +dexSimpleFPCompare (sim_cpu *cpu) +{ + /* assert instr[28,25] == 1111 + instr[30:24:21:13,10] = 0011000 + instr[31] = M : 0 ==> OK, 1 ==> UNALLOC + instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC + instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC + instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC + instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE, + 01000 ==> FCMPZ, 11000 ==> FCMPEZ, + ow ==> UNALLOC */ + uint32_t dispatch; + uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) + | uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); + uint32_t op = uimm (aarch64_get_instr (cpu), 15, 14); + uint32_t op2_2_0 = uimm (aarch64_get_instr (cpu), 2, 0); + + if (op2_2_0 != 0) + HALT_UNALLOC; + + if (M_S != 0) + HALT_UNALLOC; + + if (type > 1) + HALT_UNALLOC; + + if (op != 0) + HALT_UNALLOC; + + /* dispatch on type and top 2 bits of opcode. */ + dispatch = (type << 2) | uimm (aarch64_get_instr (cpu), 4, 3); + + switch (dispatch) + { + case 0: fcmps (cpu); return; + case 1: fcmpzs (cpu); return; + case 2: fcmpes (cpu); return; + case 3: fcmpzes (cpu); return; + case 4: fcmpd (cpu); return; + case 5: fcmpzd (cpu); return; + case 6: fcmped (cpu); return; + case 7: fcmpzed (cpu); return; + default: HALT_UNREACHABLE; + } +} + +static void +do_scalar_FADDP (sim_cpu *cpu) +{ + /* instr [31,23] = 011111100 + instr [22] = single(0)/double(1) + instr [21,10] = 1100 0011 0110 + instr [9,5] = Fn + instr [4,0] = Fd. */ + + unsigned Fn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned Fd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x0FC); + NYI_assert (21, 10, 0xC36); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double val1 = aarch64_get_vec_double (cpu, Fn, 0); + double val2 = aarch64_get_vec_double (cpu, Fn, 1); + + aarch64_set_FP_double (cpu, Fd, val1 + val2); + } + else + { + float val1 = aarch64_get_vec_float (cpu, Fn, 0); + float val2 = aarch64_get_vec_float (cpu, Fn, 1); + + aarch64_set_FP_float (cpu, Fd, val1 + val2); + } +} + +/* Floating point absolute difference. */ + +static void +do_scalar_FABD (sim_cpu *cpu) +{ + /* instr [31,23] = 0111 1110 1 + instr [22] = float(0)/double(1) + instr [21] = 1 + instr [20,16] = Rm + instr [15,10] = 1101 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x0FD); + NYI_assert (21, 21, 1); + NYI_assert (15, 10, 0x35); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + aarch64_set_FP_double (cpu, rd, + fabs (aarch64_get_FP_double (cpu, rn) + - aarch64_get_FP_double (cpu, rm))); + else + aarch64_set_FP_float (cpu, rd, + fabsf (aarch64_get_FP_float (cpu, rn) + - aarch64_get_FP_float (cpu, rm))); +} + +static void +do_scalar_CMGT (sim_cpu *cpu) +{ + /* instr [31,21] = 0101 1110 111 + instr [20,16] = Rm + instr [15,10] = 00 1101 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 21, 0x2F7); + NYI_assert (15, 10, 0x0D); + + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_u64 (cpu, rn, 0) > + aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L); +} + +static void +do_scalar_USHR (sim_cpu *cpu) +{ + /* instr [31,23] = 0111 1111 0 + instr [22,16] = shift amount + instr [15,10] = 0000 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned amount = 128 - uimm (aarch64_get_instr (cpu), 22, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x0FE); + NYI_assert (15, 10, 0x01); + + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_u64 (cpu, rn, 0) >> amount); +} + +static void +do_scalar_SHL (sim_cpu *cpu) +{ + /* instr [31,23] = 0111 1101 0 + instr [22,16] = shift amount + instr [15,10] = 0101 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned amount = uimm (aarch64_get_instr (cpu), 22, 16) - 64; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 23, 0x0BE); + NYI_assert (15, 10, 0x15); + + if (uimm (aarch64_get_instr (cpu), 22, 22) == 0) + HALT_UNALLOC; + + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_u64 (cpu, rn, 0) << amount); +} + +/* FCMEQ FCMGT FCMGE. */ +static void +do_scalar_FCM (sim_cpu *cpu) +{ + /* instr [31,30] = 01 + instr [29] = U + instr [28,24] = 1 1110 + instr [23] = E + instr [22] = size + instr [21] = 1 + instr [20,16] = Rm + instr [15,12] = 1110 + instr [11] = AC + instr [10] = 1 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned EUac = (uimm (aarch64_get_instr (cpu), 23, 23) << 2) + | (uimm (aarch64_get_instr (cpu), 29, 29) << 1) + | uimm (aarch64_get_instr (cpu), 11, 11); + unsigned result; + float val1; + float val2; + + NYI_assert (31, 30, 1); + NYI_assert (28, 24, 0x1E); + NYI_assert (21, 21, 1); + NYI_assert (15, 12, 0xE); + NYI_assert (10, 10, 1); + + if (uimm (aarch64_get_instr (cpu), 22, 22)) + { + double val1 = aarch64_get_FP_double (cpu, rn); + double val2 = aarch64_get_FP_double (cpu, rm); + + switch (EUac) + { + case 0: /* 000 */ + result = val1 == val2; + break; + + case 3: /* 011 */ + val1 = fabs (val1); + val2 = fabs (val2); + /* Fall through. */ + case 2: /* 010 */ + result = val1 >= val2; + break; + + case 7: /* 111 */ + val1 = fabs (val1); + val2 = fabs (val2); + /* Fall through. */ + case 6: /* 110 */ + result = val1 > val2; + break; + + default: + HALT_UNALLOC; + } + + aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); + return; + } + + val1 = aarch64_get_FP_float (cpu, rn); + val2 = aarch64_get_FP_float (cpu, rm); + + switch (EUac) + { + case 0: /* 000 */ + result = val1 == val2; + break; + + case 3: /* 011 */ + val1 = fabsf (val1); + val2 = fabsf (val2); + /* Fall through. */ + case 2: /* 010 */ + result = val1 >= val2; + break; + + case 7: /* 111 */ + val1 = fabsf (val1); + val2 = fabsf (val2); + /* Fall through. */ + case 6: /* 110 */ + result = val1 > val2; + break; + + default: + HALT_UNALLOC; + } + + aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); +} + +/* An alias of DUP. */ +static void +do_scalar_MOV (sim_cpu *cpu) +{ + /* instr [31,21] = 0101 1110 000 + instr [20,16] = imm5 + instr [15,10] = 0000 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned index; + + NYI_assert (31, 21, 0x2F0); + NYI_assert (15, 10, 0x01); + + if (uimm (aarch64_get_instr (cpu), 16, 16)) + { + /* 8-bit. */ + index = uimm (aarch64_get_instr (cpu), 20, 17); + aarch64_set_vec_u8 + (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); + } + else if (uimm (aarch64_get_instr (cpu), 17, 17)) + { + /* 16-bit. */ + index = uimm (aarch64_get_instr (cpu), 20, 18); + aarch64_set_vec_u16 + (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); + } + else if (uimm (aarch64_get_instr (cpu), 18, 18)) + { + /* 32-bit. */ + index = uimm (aarch64_get_instr (cpu), 20, 19); + aarch64_set_vec_u32 + (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); + } + else if (uimm (aarch64_get_instr (cpu), 19, 19)) + { + /* 64-bit. */ + index = uimm (aarch64_get_instr (cpu), 20, 20); + aarch64_set_vec_u64 + (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); + } + else + HALT_UNALLOC; +} + +static void +do_double_add (sim_cpu *cpu) +{ + /* instr [28,25] = 1111. */ + unsigned Fd; + unsigned Fm; + unsigned Fn; + double val1; + double val2; + + switch (uimm (aarch64_get_instr (cpu), 31, 23)) + { + case 0xBC: + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x01: do_scalar_MOV (cpu); return; + case 0x39: do_scalar_FCM (cpu); return; + case 0x3B: do_scalar_FCM (cpu); return; + } + break; + + case 0xBE: do_scalar_SHL (cpu); return; + + case 0xFC: + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x36: do_scalar_FADDP (cpu); return; + case 0x39: do_scalar_FCM (cpu); return; + case 0x3B: do_scalar_FCM (cpu); return; + } + break; + + case 0xFD: + switch (uimm (aarch64_get_instr (cpu), 15, 10)) + { + case 0x0D: do_scalar_CMGT (cpu); return; + case 0x35: do_scalar_FABD (cpu); return; + case 0x39: do_scalar_FCM (cpu); return; + case 0x3B: do_scalar_FCM (cpu); return; + default: + HALT_NYI; + } + + case 0xFE: do_scalar_USHR (cpu); return; + default: + break; + } + + /* instr [31,21] = 0101 1110 111 + instr [20,16] = Fn + instr [15,10] = 1000 01 + instr [9,5] = Fm + instr [4,0] = Fd. */ + if (uimm (aarch64_get_instr (cpu), 31, 21) != 0x2F7 + || uimm (aarch64_get_instr (cpu), 15, 10) != 0x21) + HALT_NYI; + + Fd = uimm (aarch64_get_instr (cpu), 4, 0); + Fm = uimm (aarch64_get_instr (cpu), 9, 5); + Fn = uimm (aarch64_get_instr (cpu), 20, 16); + + val1 = aarch64_get_FP_double (cpu, Fm); + val2 = aarch64_get_FP_double (cpu, Fn); + + aarch64_set_FP_double (cpu, Fd, val1 + val2); +} + +static void +dexAdvSIMD1 (sim_cpu *cpu) +{ + /* instr [28,25] = 1 111. */ + + /* we are currently only interested in the basic + scalar fp routines which all have bit 30 = 0. */ + if (uimm (aarch64_get_instr (cpu), 30, 30)) + do_double_add (cpu); + + /* instr[24] is set for FP data processing 3-source and clear for + all other basic scalar fp instruction groups. */ + else if (uimm (aarch64_get_instr (cpu), 24, 24)) + dexSimpleFPDataProc3Source (cpu); + + /* instr[21] is clear for floating <-> fixed conversions and set for + all other basic scalar fp instruction groups. */ + else if (!uimm (aarch64_get_instr (cpu), 21, 21)) + dexSimpleFPFixedConvert (cpu); + + /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source + 11 ==> cond select, 00 ==> other. */ + else + switch (uimm (aarch64_get_instr (cpu), 11, 10)) + { + case 1: dexSimpleFPCondCompare (cpu); return; + case 2: dexSimpleFPDataProc2Source (cpu); return; + case 3: dexSimpleFPCondSelect (cpu); return; + + default: + /* Now an ordered cascade of tests. + FP immediate has aarch64_get_instr (cpu)[12] == 1. + FP compare has aarch64_get_instr (cpu)[13] == 1. + FP Data Proc 1 Source has aarch64_get_instr (cpu)[14] == 1. + FP floating <--> integer conversions has aarch64_get_instr (cpu)[15] == 0. */ + if (uimm (aarch64_get_instr (cpu), 12, 12)) + dexSimpleFPImmediate (cpu); + + else if (uimm (aarch64_get_instr (cpu), 13, 13)) + dexSimpleFPCompare (cpu); + + else if (uimm (aarch64_get_instr (cpu), 14, 14)) + dexSimpleFPDataProc1Source (cpu); + + else if (!uimm (aarch64_get_instr (cpu), 15, 15)) + dexSimpleFPIntegerConvert (cpu); + + else + /* If we get here then instr[15] == 1 which means UNALLOC. */ + HALT_UNALLOC; + } +} + +/* PC relative addressing. */ + +static void +pcadr (sim_cpu *cpu) +{ + /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP + instr[30,29] = immlo + instr[23,5] = immhi. */ + uint64_t address; + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t isPage = uimm (aarch64_get_instr (cpu), 31, 31); + union { int64_t u64; uint64_t s64; } imm; + uint64_t offset; + + imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5); + offset = imm.u64; + offset = (offset << 2) | uimm (aarch64_get_instr (cpu), 30, 29); + + address = aarch64_get_PC (cpu); + + if (isPage) + { + offset <<= 12; + address &= ~0xfff; + } + + aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset); +} + +/* Specific decode and execute for group Data Processing Immediate. */ + +static void +dexPCRelAddressing (sim_cpu *cpu) +{ + /* assert instr[28,24] = 10000. */ + pcadr (cpu); +} + +/* Immediate logical. + The bimm32/64 argument is constructed by replicating a 2, 4, 8, + 16, 32 or 64 bit sequence pulled out at decode and possibly + inverting it.. + + N.B. the output register (dest) can normally be Xn or SP + the exception occurs for flag setting instructions which may + only use Xn for the output (dest). The input register can + never be SP. */ + +/* 32 bit and immediate. */ +static void +and32 (sim_cpu *cpu, uint32_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm); +} + +/* 64 bit and immediate. */ +static void +and64 (sim_cpu *cpu, uint64_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm); +} + +/* 32 bit and immediate set flags. */ +static void +ands32 (sim_cpu *cpu, uint32_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = bimm; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop32 (cpu, value1 & value2); +} + +/* 64 bit and immediate set flags. */ +static void +ands64 (sim_cpu *cpu, uint64_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = bimm; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop64 (cpu, value1 & value2); +} + +/* 32 bit exclusive or immediate. */ +static void +eor32 (sim_cpu *cpu, uint32_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm); +} + +/* 64 bit exclusive or immediate. */ +static void +eor64 (sim_cpu *cpu, uint64_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm); +} + +/* 32 bit or immediate. */ +static void +orr32 (sim_cpu *cpu, uint32_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm); +} + +/* 64 bit or immediate. */ +static void +orr64 (sim_cpu *cpu, uint64_t bimm) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, SP_OK, + aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm); +} + +/* Logical shifted register. + These allow an optional LSL, ASR, LSR or ROR to the second source + register with a count up to the register bit count. + N.B register args may not be SP. */ + +/* 32 bit AND shifted register. */ +static void +and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit AND shifted register. */ +static void +and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +/* 32 bit AND shifted register setting flags. */ +static void +ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop32 (cpu, value1 & value2); +} + +/* 64 bit AND shifted register setting flags. */ +static void +ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop64 (cpu, value1 & value2); +} + +/* 32 bit BIC shifted register. */ +static void +bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit BIC shifted register. */ +static void +bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +/* 32 bit BIC shifted register setting flags. */ +static void +bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop32 (cpu, value1 & value2); +} + +/* 64 bit BIC shifted register setting flags. */ +static void +bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), + shift, count); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); + set_flags_for_binop64 (cpu, value1 & value2); +} + +/* 32 bit EON shifted register. */ +static void +eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit EON shifted register. */ +static void +eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +/* 32 bit EOR shifted register. */ +static void +eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit EOR shifted register. */ +static void +eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +/* 32 bit ORR shifted register. */ +static void +orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit ORR shifted register. */ +static void +orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +/* 32 bit ORN shifted register. */ +static void +orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); +} + +/* 64 bit ORN shifted register. */ +static void +orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); +} + +static void +dexLogicalImmediate (sim_cpu *cpu) +{ + /* assert instr[28,23] = 1001000 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS + instr[22] = N : used to construct immediate mask + instr[21,16] = immr + instr[15,10] = imms + instr[9,5] = Rn + instr[4,0] = Rd */ + + /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); + /* uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16);. */ + /* uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10);. */ + uint32_t index = uimm (aarch64_get_instr (cpu), 22, 10); + uint64_t bimm64 = LITable [index]; + uint32_t dispatch = uimm (aarch64_get_instr (cpu), 30, 29); + + if (~size & N) + HALT_UNALLOC; + + if (!bimm64) + HALT_UNALLOC; + + if (size == 0) + { + uint32_t bimm = (uint32_t) bimm64; + + switch (dispatch) + { + case 0: and32 (cpu, bimm); return; + case 1: orr32 (cpu, bimm); return; + case 2: eor32 (cpu, bimm); return; + case 3: ands32 (cpu, bimm); return; + } + } + else + { + switch (dispatch) + { + case 0: and64 (cpu, bimm64); return; + case 1: orr64 (cpu, bimm64); return; + case 2: eor64 (cpu, bimm64); return; + case 3: ands64 (cpu, bimm64); return; + } + } + HALT_UNALLOC; +} + +/* Immediate move. + The uimm argument is a 16 bit value to be inserted into the + target register the pos argument locates the 16 bit word in the + dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2, + 3} for 64 bit. + N.B register arg may not be SP so it should be. + accessed using the setGZRegisterXXX accessors. */ + +/* 32 bit move 16 bit immediate zero remaining shorts. */ +static void +movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16)); +} + +/* 64 bit move 16 bit immediate zero remaining shorts. */ +static void +movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16)); +} + +/* 32 bit move 16 bit immediate negated. */ +static void +movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU)); +} + +/* 64 bit move 16 bit immediate negated. */ +static void +movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16)) + ^ 0xffffffffffffffffULL)); +} + +/* 32 bit move 16 bit immediate keep remaining shorts. */ +static void +movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP); + uint32_t value = val << (pos * 16); + uint32_t mask = ~(0xffffU << (pos * 16)); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); +} + +/* 64 bit move 16 it immediate keep remaining shorts. */ +static void +movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos) +{ + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP); + uint64_t value = (uint64_t) val << (pos * 16); + uint64_t mask = ~(0xffffULL << (pos * 16)); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); +} + +static void +dexMoveWideImmediate (sim_cpu *cpu) +{ + /* assert instr[28:23] = 100101 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK + instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48 + instr[20,5] = uimm16 + instr[4,0] = Rd */ + + /* N.B. the (multiple of 16) shift is applied by the called routine, + we just pass the multiplier. */ + + uint32_t imm; + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t op = uimm (aarch64_get_instr (cpu), 30, 29); + uint32_t shift = uimm (aarch64_get_instr (cpu), 22, 21); + + /* 32 bit can only shift 0 or 1 lot of 16. + anything else is an unallocated instruction. */ + if (size == 0 && (shift > 1)) + HALT_UNALLOC; + + if (op == 1) + HALT_UNALLOC; + + imm = uimm (aarch64_get_instr (cpu), 20, 5); + + if (size == 0) + { + if (op == 0) + movn32 (cpu, imm, shift); + else if (op == 2) + movz32 (cpu, imm, shift); + else + movk32 (cpu, imm, shift); + } + else + { + if (op == 0) + movn64 (cpu, imm, shift); + else if (op == 2) + movz64 (cpu, imm, shift); + else + movk64 (cpu, imm, shift); + } +} + +/* Bitfield operations. + These take a pair of bit positions r and s which are in {0..31} + or {0..63} depending on the instruction word size. + N.B register args may not be SP. */ + +/* OK, we start with ubfm which just needs to pick + some bits out of source zero the rest and write + the result to dest. Just need two logical shifts. */ + +/* 32 bit bitfield move, left and right of affected zeroed + if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ +static void +ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rd; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + + /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ + if (r <= s) + { + /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. + We want only bits s:xxx:r at the bottom of the word + so we LSL bit s up to bit 31 i.e. by 31 - s + and then we LSR to bring bit 31 down to bit s - r + i.e. by 31 + r - s. */ + value <<= 31 - s; + value >>= 31 + r - s; + } + else + { + /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0 + We want only bits s:xxx:0 starting at it 31-(r-1) + so we LSL bit s up to bit 31 i.e. by 31 - s + and then we LSL to bring bit 31 down to 31-(r-1)+s + i.e. by r - (s + 1). */ + value <<= 31 - s; + value >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + aarch64_set_reg_u64 (cpu, rd, NO_SP, value); +} + +/* 64 bit bitfield move, left and right of affected zeroed + if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ +static void +ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rd; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + + if (r <= s) + { + /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. + We want only bits s:xxx:r at the bottom of the word. + So we LSL bit s up to bit 63 i.e. by 63 - s + and then we LSR to bring bit 63 down to bit s - r + i.e. by 63 + r - s. */ + value <<= 63 - s; + value >>= 63 + r - s; + } + else + { + /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0. + We want only bits s:xxx:0 starting at it 63-(r-1). + So we LSL bit s up to bit 63 i.e. by 63 - s + and then we LSL to bring bit 63 down to 63-(r-1)+s + i.e. by r - (s + 1). */ + value <<= 63 - s; + value >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + aarch64_set_reg_u64 (cpu, rd, NO_SP, value); +} + +/* The signed versions need to insert sign bits + on the left of the inserted bit field. so we do + much the same as the unsigned version except we + use an arithmetic shift right -- this just means + we need to operate on signed values. */ + +/* 32 bit bitfield move, left of affected sign-extended, right zeroed. */ +/* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ +static void +sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rd; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + /* as per ubfm32 but use an ASR instead of an LSR. */ + int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP); + + if (r <= s) + { + value <<= 31 - s; + value >>= 31 + r - s; + } + else + { + value <<= 31 - s; + value >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); +} + +/* 64 bit bitfield move, left of affected sign-extended, right zeroed. */ +/* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ +static void +sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rd; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + /* acpu per ubfm but use an ASR instead of an LSR. */ + int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP); + + if (r <= s) + { + value <<= 63 - s; + value >>= 63 + r - s; + } + else + { + value <<= 63 - s; + value >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + aarch64_set_reg_s64 (cpu, rd, NO_SP, value); +} + +/* Finally, these versions leave non-affected bits + as is. so we need to generate the bits as per + ubfm and also generate a mask to pick the + bits from the original and computed values. */ + +/* 32 bit bitfield move, non-affected bits left as is. + If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ +static void +bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t mask = -1; + unsigned rd; + uint32_t value2; + + /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ + if (r <= s) + { + /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. + We want only bits s:xxx:r at the bottom of the word + so we LSL bit s up to bit 31 i.e. by 31 - s + and then we LSR to bring bit 31 down to bit s - r + i.e. by 31 + r - s. */ + value <<= 31 - s; + value >>= 31 + r - s; + /* the mask must include the same bits. */ + mask <<= 31 - s; + mask >>= 31 + r - s; + } + else + { + /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0. + We want only bits s:xxx:0 starting at it 31-(r-1) + so we LSL bit s up to bit 31 i.e. by 31 - s + and then we LSL to bring bit 31 down to 31-(r-1)+s + i.e. by r - (s + 1). */ + value <<= 31 - s; + value >>= r - (s + 1); + /* The mask must include the same bits. */ + mask <<= 31 - s; + mask >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP); + + value2 &= ~mask; + value2 |= value; + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value); +} + +/* 64 bit bitfield move, non-affected bits left as is. + If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ +static void +bfm (sim_cpu *cpu, uint32_t r, uint32_t s) +{ + unsigned rd; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t mask = 0xffffffffffffffffULL; + + if (r <= s) + { + /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. + We want only bits s:xxx:r at the bottom of the word + so we LSL bit s up to bit 63 i.e. by 63 - s + and then we LSR to bring bit 63 down to bit s - r + i.e. by 63 + r - s. */ + value <<= 63 - s; + value >>= 63 + r - s; + /* The mask must include the same bits. */ + mask <<= 63 - s; + mask >>= 63 + r - s; + } + else + { + /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0 + We want only bits s:xxx:0 starting at it 63-(r-1) + so we LSL bit s up to bit 63 i.e. by 63 - s + and then we LSL to bring bit 63 down to 63-(r-1)+s + i.e. by r - (s + 1). */ + value <<= 63 - s; + value >>= r - (s + 1); + /* The mask must include the same bits. */ + mask <<= 63 - s; + mask >>= r - (s + 1); + } + + rd = uimm (aarch64_get_instr (cpu), 4, 0); + aarch64_set_reg_u64 + (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value); +} + +static void +dexBitfieldImmediate (sim_cpu *cpu) +{ + /* assert instr[28:23] = 100110 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC + instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC + instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit + instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit + instr[9,5] = Rn + instr[4,0] = Rd */ + + /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ + uint32_t dispatch; + uint32_t imms; + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); + /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */ + /* or else we have an UNALLOC. */ + uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16); + + if (~size & N) + HALT_UNALLOC; + + if (!size && uimm (immr, 5, 5)) + HALT_UNALLOC; + + imms = uimm (aarch64_get_instr (cpu), 15, 10); + if (!size && uimm (imms, 5, 5)) + HALT_UNALLOC; + + /* Switch on combined size and op. */ + dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + switch (dispatch) + { + case 0: sbfm32 (cpu, immr, imms); return; + case 1: bfm32 (cpu, immr, imms); return; + case 2: ubfm32 (cpu, immr, imms); return; + case 4: sbfm (cpu, immr, imms); return; + case 5: bfm (cpu, immr, imms); return; + case 6: ubfm (cpu, immr, imms); return; + default: HALT_UNALLOC; + } +} + +static void +do_EXTR_32 (sim_cpu *cpu) +{ + /* instr[31:21] = 00010011100 + instr[20,16] = Rm + instr[15,10] = imms : 0xxxxx for 32 bit + instr[9,5] = Rn + instr[4,0] = Rd */ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 31; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t val1; + uint64_t val2; + + val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP); + val1 >>= imms; + val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP); + val2 <<= (32 - imms); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2); +} + +static void +do_EXTR_64 (sim_cpu *cpu) +{ + /* instr[31:21] = 10010011100 + instr[20,16] = Rm + instr[15,10] = imms + instr[9,5] = Rn + instr[4,0] = Rd */ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 63; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t val; + + val = aarch64_get_reg_u64 (cpu, rm, NO_SP); + val >>= imms; + val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms)); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, val); +} + +static void +dexExtractImmediate (sim_cpu *cpu) +{ + /* assert instr[28:23] = 100111 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC + instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC + instr[21] = op0 : must be 0 or UNALLOC + instr[20,16] = Rm + instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit + instr[9,5] = Rn + instr[4,0] = Rd */ + + /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ + /* 64 bit operations must have N = 1 or else we have an UNALLOC. */ + uint32_t dispatch; + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); + /* 32 bit operations must have imms[5] = 0 + or else we have an UNALLOC. */ + uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10); + + if (size ^ N) + HALT_UNALLOC; + + if (!size && uimm (imms, 5, 5)) + HALT_UNALLOC; + + /* Switch on combined size and op. */ + dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + + if (dispatch == 0) + do_EXTR_32 (cpu); + + else if (dispatch == 4) + do_EXTR_64 (cpu); + + else if (dispatch == 1) + HALT_NYI; + else + HALT_UNALLOC; +} + +static void +dexDPImm (sim_cpu *cpu) +{ + /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); + assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001 + bits [25,23] of a DPImm are the secondary dispatch vector. */ + uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu)); + + switch (group2) + { + case DPIMM_PCADR_000: + case DPIMM_PCADR_001: + dexPCRelAddressing (cpu); + return; + + case DPIMM_ADDSUB_010: + case DPIMM_ADDSUB_011: + dexAddSubtractImmediate (cpu); + return; + + case DPIMM_LOG_100: + dexLogicalImmediate (cpu); + return; + + case DPIMM_MOV_101: + dexMoveWideImmediate (cpu); + return; + + case DPIMM_BITF_110: + dexBitfieldImmediate (cpu); + return; + + case DPIMM_EXTR_111: + dexExtractImmediate (cpu); + return; + + default: + /* Should never reach here. */ + HALT_NYI; + } +} + +static void +dexLoadUnscaledImmediate (sim_cpu *cpu) +{ + /* instr[29,24] == 111_00 + instr[21] == 0 + instr[11,10] == 00 + instr[31,30] = size + instr[26] = V + instr[23,22] = opc + instr[20,12] = simm9 + instr[9,5] = rn may be SP. */ + /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ + uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26); + uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) + | uimm (aarch64_get_instr (cpu), 23, 22)); + int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); + + if (!V) + { + /* GReg operations. */ + switch (dispatch) + { + case 0: sturb (cpu, imm); return; + case 1: ldurb32 (cpu, imm); return; + case 2: ldursb64 (cpu, imm); return; + case 3: ldursb32 (cpu, imm); return; + case 4: sturh (cpu, imm); return; + case 5: ldurh32 (cpu, imm); return; + case 6: ldursh64 (cpu, imm); return; + case 7: ldursh32 (cpu, imm); return; + case 8: stur32 (cpu, imm); return; + case 9: ldur32 (cpu, imm); return; + case 10: ldursw (cpu, imm); return; + case 12: stur64 (cpu, imm); return; + case 13: ldur64 (cpu, imm); return; + + case 14: + /* PRFUM NYI. */ + HALT_NYI; + + default: + case 11: + case 15: + HALT_UNALLOC; + } + } + + /* FReg operations. */ + switch (dispatch) + { + case 2: fsturq (cpu, imm); return; + case 3: fldurq (cpu, imm); return; + case 8: fsturs (cpu, imm); return; + case 9: fldurs (cpu, imm); return; + case 12: fsturd (cpu, imm); return; + case 13: fldurd (cpu, imm); return; + + case 0: /* STUR 8 bit FP. */ + case 1: /* LDUR 8 bit FP. */ + case 4: /* STUR 16 bit FP. */ + case 5: /* LDUR 8 bit FP. */ + HALT_NYI; + + default: + case 6: + case 7: + case 10: + case 11: + case 14: + case 15: + HALT_UNALLOC; + } +} + +/* N.B. A preliminary note regarding all the ldrs<x>32 + instructions + + The signed value loaded by these instructions is cast to unsigned + before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the + 64 bit element of the GReg union. this performs a 32 bit sign extension + (as required) but avoids 64 bit sign extension, thus ensuring that the + top half of the register word is zero. this is what the spec demands + when a 32 bit load occurs. */ + +/* 32 bit load sign-extended byte scaled unsigned 12 bit. */ +static void +ldrsb32_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* The target register may not be SP but the source may be + there is no scaling required for a byte load. */ + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; + aarch64_set_reg_u64 (cpu, rt, NO_SP, + (int64_t) aarch64_get_mem_s8 (cpu, address)); +} + +/* 32 bit load sign-extended byte scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned int rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + + /* rn may reference SP, rm and rt must reference ZR. */ + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + + /* There is no scaling required for a byte load. */ + aarch64_set_reg_u64 + (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address + + displacement)); +} + +/* 32 bit load sign-extended byte unscaled signed 9 bit with + pre- or post-writeback. */ +static void +ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + uint64_t address; + unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (rn == rt && wb != NoWriteBack) + HALT_UNALLOC; + + address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb == Pre) + address += offset; + + aarch64_set_reg_u64 (cpu, rt, NO_SP, + (int64_t) aarch64_get_mem_s8 (cpu, address)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, NO_SP, address); +} + +/* 8 bit store scaled. */ +static void +fstrb_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + aarch64_set_mem_u8 (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, + aarch64_get_vec_u8 (cpu, st, 0)); +} + +/* 8 bit store scaled or unscaled zero- or + sign-extended 8-bit register offset. */ +static void +fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_mem_u8 + (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0)); +} + +/* 16 bit store scaled. */ +static void +fstrh_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + aarch64_set_mem_u16 + (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16), + aarch64_get_vec_u16 (cpu, st, 0)); +} + +/* 16 bit store scaled or unscaled zero- + or sign-extended 16-bit register offset. */ +static void +fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_mem_u16 + (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0)); +} + +/* 32 bit store scaled unsigned 12 bit. */ +static void +fstrs_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + aarch64_set_mem_float + (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32), + aarch64_get_FP_float (cpu, st)); +} + +/* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ +static void +fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, st)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 32 bit store scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 32, scaling); + + aarch64_set_mem_float + (cpu, address + displacement, aarch64_get_FP_float (cpu, st)); +} + +/* 64 bit store scaled unsigned 12 bit. */ +static void +fstrd_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + aarch64_set_mem_double + (cpu, + aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), + aarch64_get_FP_double (cpu, st)); +} + +/* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ +static void +fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, st)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 64 bit store scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 64, scaling); + + aarch64_set_mem_double + (cpu, address + displacement, aarch64_get_FP_double (cpu, st)); +} + +/* 128 bit store scaled unsigned 12 bit. */ +static void +fstrq_abs (sim_cpu *cpu, uint32_t offset) +{ + FRegister a; + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + uint64_t addr; + + aarch64_get_FP_long_double (cpu, st, & a); + + addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); + aarch64_set_mem_long_double (cpu, addr, a); +} + +/* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */ +static void +fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + FRegister a; + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + + if (wb != Post) + address += offset; + + aarch64_get_FP_long_double (cpu, st, & a); + aarch64_set_mem_long_double (cpu, address, a); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rn, SP_OK, address); +} + +/* 128 bit store scaled or unscaled zero- + or sign-extended 32-bit register offset. */ +static void +fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + + uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); + int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), + extension); + uint64_t displacement = OPT_SCALE (extended, 128, scaling); + + FRegister a; + + aarch64_get_FP_long_double (cpu, st, & a); + aarch64_set_mem_long_double (cpu, address + displacement, a); +} + +static void +dexLoadImmediatePrePost (sim_cpu *cpu) +{ + /* instr[29,24] == 111_00 + instr[21] == 0 + instr[11,10] == 00 + instr[31,30] = size + instr[26] = V + instr[23,22] = opc + instr[20,12] = simm9 + instr[11] = wb : 0 ==> Post, 1 ==> Pre + instr[9,5] = rn may be SP. */ + /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ + uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26); + uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) + | uimm (aarch64_get_instr (cpu), 23, 22)); + int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); + WriteBack wb = writeback (aarch64_get_instr (cpu), 11); + + if (!V) + { + /* GReg operations. */ + switch (dispatch) + { + case 0: strb_wb (cpu, imm, wb); return; + case 1: ldrb32_wb (cpu, imm, wb); return; + case 2: ldrsb_wb (cpu, imm, wb); return; + case 3: ldrsb32_wb (cpu, imm, wb); return; + case 4: strh_wb (cpu, imm, wb); return; + case 5: ldrh32_wb (cpu, imm, wb); return; + case 6: ldrsh64_wb (cpu, imm, wb); return; + case 7: ldrsh32_wb (cpu, imm, wb); return; + case 8: str32_wb (cpu, imm, wb); return; + case 9: ldr32_wb (cpu, imm, wb); return; + case 10: ldrsw_wb (cpu, imm, wb); return; + case 12: str_wb (cpu, imm, wb); return; + case 13: ldr_wb (cpu, imm, wb); return; + + default: + case 11: + case 14: + case 15: + HALT_UNALLOC; + } + } + + /* FReg operations. */ + switch (dispatch) + { + case 2: fstrq_wb (cpu, imm, wb); return; + case 3: fldrq_wb (cpu, imm, wb); return; + case 8: fstrs_wb (cpu, imm, wb); return; + case 9: fldrs_wb (cpu, imm, wb); return; + case 12: fstrd_wb (cpu, imm, wb); return; + case 13: fldrd_wb (cpu, imm, wb); return; + + case 0: /* STUR 8 bit FP. */ + case 1: /* LDUR 8 bit FP. */ + case 4: /* STUR 16 bit FP. */ + case 5: /* LDUR 8 bit FP. */ + HALT_NYI; + + default: + case 6: + case 7: + case 10: + case 11: + case 14: + case 15: + HALT_UNALLOC; + } +} + +static void +dexLoadRegisterOffset (sim_cpu *cpu) +{ + /* instr[31,30] = size + instr[29,27] = 111 + instr[26] = V + instr[25,24] = 00 + instr[23,22] = opc + instr[21] = 1 + instr[20,16] = rm + instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL, + 110 ==> SXTW, 111 ==> SXTX, + ow ==> RESERVED + instr[12] = scaled + instr[11,10] = 10 + instr[9,5] = rn + instr[4,0] = rt. */ + + uint32_t V = uimm (aarch64_get_instr (cpu), 26,26); + uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) + | uimm (aarch64_get_instr (cpu), 23, 22)); + Scaling scale = scaling (aarch64_get_instr (cpu), 12); + Extension extensionType = extension (aarch64_get_instr (cpu), 13); + + /* Check for illegal extension types. */ + if (uimm (extensionType, 1, 1) == 0) + HALT_UNALLOC; + + if (extensionType == UXTX || extensionType == SXTX) + extensionType = NoExtension; + + if (!V) + { + /* GReg operations. */ + switch (dispatch) + { + case 0: strb_scale_ext (cpu, scale, extensionType); return; + case 1: ldrb32_scale_ext (cpu, scale, extensionType); return; + case 2: ldrsb_scale_ext (cpu, scale, extensionType); return; + case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return; + case 4: strh_scale_ext (cpu, scale, extensionType); return; + case 5: ldrh32_scale_ext (cpu, scale, extensionType); return; + case 6: ldrsh_scale_ext (cpu, scale, extensionType); return; + case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return; + case 8: str32_scale_ext (cpu, scale, extensionType); return; + case 9: ldr32_scale_ext (cpu, scale, extensionType); return; + case 10: ldrsw_scale_ext (cpu, scale, extensionType); return; + case 12: str_scale_ext (cpu, scale, extensionType); return; + case 13: ldr_scale_ext (cpu, scale, extensionType); return; + case 14: prfm_scale_ext (cpu, scale, extensionType); return; + + default: + case 11: + case 15: + HALT_UNALLOC; + } + } + + /* FReg operations. */ + switch (dispatch) + { + case 1: /* LDUR 8 bit FP. */ + HALT_NYI; + case 3: fldrq_scale_ext (cpu, scale, extensionType); return; + case 5: /* LDUR 8 bit FP. */ + HALT_NYI; + case 9: fldrs_scale_ext (cpu, scale, extensionType); return; + case 13: fldrd_scale_ext (cpu, scale, extensionType); return; + + case 0: fstrb_scale_ext (cpu, scale, extensionType); return; + case 2: fstrq_scale_ext (cpu, scale, extensionType); return; + case 4: fstrh_scale_ext (cpu, scale, extensionType); return; + case 8: fstrs_scale_ext (cpu, scale, extensionType); return; + case 12: fstrd_scale_ext (cpu, scale, extensionType); return; + + default: + case 6: + case 7: + case 10: + case 11: + case 14: + case 15: + HALT_UNALLOC; + } +} + +static void +dexLoadUnsignedImmediate (sim_cpu *cpu) +{ + /* assert instr[29,24] == 111_01 + instr[31,30] = size + instr[26] = V + instr[23,22] = opc + instr[21,10] = uimm12 : unsigned immediate offset + instr[9,5] = rn may be SP. */ + /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ + uint32_t V = uimm (aarch64_get_instr (cpu), 26,26); + uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) + | uimm (aarch64_get_instr (cpu), 23, 22)); + uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10); + + if (!V) + { + /* GReg operations. */ + switch (dispatch) + { + case 0: strb_abs (cpu, imm); return; + case 1: ldrb32_abs (cpu, imm); return; + case 2: ldrsb_abs (cpu, imm); return; + case 3: ldrsb32_abs (cpu, imm); return; + case 4: strh_abs (cpu, imm); return; + case 5: ldrh32_abs (cpu, imm); return; + case 6: ldrsh_abs (cpu, imm); return; + case 7: ldrsh32_abs (cpu, imm); return; + case 8: str32_abs (cpu, imm); return; + case 9: ldr32_abs (cpu, imm); return; + case 10: ldrsw_abs (cpu, imm); return; + case 12: str_abs (cpu, imm); return; + case 13: ldr_abs (cpu, imm); return; + case 14: prfm_abs (cpu, imm); return; + + default: + case 11: + case 15: + HALT_UNALLOC; + } + } + + /* FReg operations. */ + switch (dispatch) + { + case 3: fldrq_abs (cpu, imm); return; + case 9: fldrs_abs (cpu, imm); return; + case 13: fldrd_abs (cpu, imm); return; + + case 0: fstrb_abs (cpu, imm); return; + case 2: fstrq_abs (cpu, imm); return; + case 4: fstrh_abs (cpu, imm); return; + case 8: fstrs_abs (cpu, imm); return; + case 12: fstrd_abs (cpu, imm); return; + + case 1: /* LDR 8 bit FP. */ + case 5: /* LDR 8 bit FP. */ + HALT_NYI; + + default: + case 6: + case 7: + case 10: + case 11: + case 14: + case 15: + HALT_UNALLOC; + } +} + +static void +dexLoadExclusive (sim_cpu *cpu) +{ + /* assert instr[29:24] = 001000; + instr[31,30] = size + instr[23] = 0 if exclusive + instr[22] = L : 1 if load, 0 if store + instr[21] = 1 if pair + instr[20,16] = Rs + instr[15] = o0 : 1 if ordered + instr[14,10] = Rt2 + instr[9,5] = Rn + instr[4.0] = Rt. */ + + switch (uimm (aarch64_get_instr (cpu), 22, 21)) + { + case 2: ldxr (cpu); return; + case 0: stxr (cpu); return; + default: HALT_NYI; + } +} + +static void +dexLoadOther (sim_cpu *cpu) +{ + uint32_t dispatch; + + /* instr[29,25] = 111_0 + instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate + instr[21:11,10] is the secondary dispatch. */ + if (uimm (aarch64_get_instr (cpu), 24, 24)) + { + dexLoadUnsignedImmediate (cpu); + return; + } + + dispatch = ( (uimm (aarch64_get_instr (cpu), 21, 21) << 2) + | uimm (aarch64_get_instr (cpu), 11, 10)); + switch (dispatch) + { + case 0: dexLoadUnscaledImmediate (cpu); return; + case 1: dexLoadImmediatePrePost (cpu); return; + case 3: dexLoadImmediatePrePost (cpu); return; + case 6: dexLoadRegisterOffset (cpu); return; + + default: + case 2: + case 4: + case 5: + case 7: + HALT_NYI; + } +} + +static void +store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + if ((rn == rd || rm == rd) && wb != NoWriteBack) + HALT_UNALLOC; /* ??? */ + + offset <<= 2; + + if (wb != Post) + address += offset; + + aarch64_set_mem_u32 (cpu, address, + aarch64_get_reg_u32 (cpu, rm, NO_SP)); + aarch64_set_mem_u32 (cpu, address + 4, + aarch64_get_reg_u32 (cpu, rn, NO_SP)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + if ((rn == rd || rm == rd) && wb != NoWriteBack) + HALT_UNALLOC; /* ??? */ + + offset <<= 3; + + if (wb != Post) + address += offset; + + aarch64_set_mem_u64 (cpu, address, + aarch64_get_reg_u64 (cpu, rm, SP_OK)); + aarch64_set_mem_u64 (cpu, address + 8, + aarch64_get_reg_u64 (cpu, rn, SP_OK)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + /* treat this as unalloc to make sure we don't do it. */ + if (rn == rm) + HALT_UNALLOC; + + offset <<= 2; + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address)); + aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + /* Treat this as unalloc to make sure we don't do it. */ + if (rn == rm) + HALT_UNALLOC; + + offset <<= 2; + + if (wb != Post) + address += offset; + + aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address)); + aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + /* Treat this as unalloc to make sure we don't do it. */ + if (rn == rm) + HALT_UNALLOC; + + offset <<= 3; + + if (wb != Post) + address += offset; + + aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address)); + aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +dex_load_store_pair_gr (sim_cpu *cpu) +{ + /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit) + instr[29,25] = instruction encoding: 101_0 + instr[26] = V : 1 if fp 0 if gp + instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) + instr[22] = load/store (1=> load) + instr[21,15] = signed, scaled, offset + instr[14,10] = Rn + instr[ 9, 5] = Rd + instr[ 4, 0] = Rm. */ + + uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3) + | uimm (aarch64_get_instr (cpu), 24, 22)); + int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); + + switch (dispatch) + { + case 2: store_pair_u32 (cpu, offset, Post); return; + case 3: load_pair_u32 (cpu, offset, Post); return; + case 4: store_pair_u32 (cpu, offset, NoWriteBack); return; + case 5: load_pair_u32 (cpu, offset, NoWriteBack); return; + case 6: store_pair_u32 (cpu, offset, Pre); return; + case 7: load_pair_u32 (cpu, offset, Pre); return; + + case 11: load_pair_s32 (cpu, offset, Post); return; + case 13: load_pair_s32 (cpu, offset, NoWriteBack); return; + case 15: load_pair_s32 (cpu, offset, Pre); return; + + case 18: store_pair_u64 (cpu, offset, Post); return; + case 19: load_pair_u64 (cpu, offset, Post); return; + case 20: store_pair_u64 (cpu, offset, NoWriteBack); return; + case 21: load_pair_u64 (cpu, offset, NoWriteBack); return; + case 22: store_pair_u64 (cpu, offset, Pre); return; + case 23: load_pair_u64 (cpu, offset, Pre); return; + + default: + HALT_UNALLOC; + } +} + +static void +store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + offset <<= 2; + + if (wb != Post) + address += offset; + + aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, rm)); + aarch64_set_mem_float (cpu, address + 4, aarch64_get_FP_float (cpu, rn)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + offset <<= 3; + + if (wb != Post) + address += offset; + + aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, rm)); + aarch64_set_mem_double (cpu, address + 8, aarch64_get_FP_double (cpu, rn)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + FRegister a; + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + offset <<= 4; + + if (wb != Post) + address += offset; + + aarch64_get_FP_long_double (cpu, rm, & a); + aarch64_set_mem_long_double (cpu, address, a); + aarch64_get_FP_long_double (cpu, rn, & a); + aarch64_set_mem_long_double (cpu, address + 16, a); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + if (rm == rn) + HALT_UNALLOC; + + offset <<= 2; + + if (wb != Post) + address += offset; + + aarch64_set_FP_float (cpu, rm, aarch64_get_mem_float (cpu, address)); + aarch64_set_FP_float (cpu, rn, aarch64_get_mem_float (cpu, address + 4)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + if (rm == rn) + HALT_UNALLOC; + + offset <<= 3; + + if (wb != Post) + address += offset; + + aarch64_set_FP_double (cpu, rm, aarch64_get_mem_double (cpu, address)); + aarch64_set_FP_double (cpu, rn, aarch64_get_mem_double (cpu, address + 8)); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) +{ + FRegister a; + unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); + + if (rm == rn) + HALT_UNALLOC; + + offset <<= 4; + + if (wb != Post) + address += offset; + + aarch64_get_mem_long_double (cpu, address, & a); + aarch64_set_FP_long_double (cpu, rm, a); + aarch64_get_mem_long_double (cpu, address + 16, & a); + aarch64_set_FP_long_double (cpu, rn, a); + + if (wb == Post) + address += offset; + + if (wb != NoWriteBack) + aarch64_set_reg_u64 (cpu, rd, SP_OK, address); +} + +static void +dex_load_store_pair_fp (sim_cpu *cpu) +{ + /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit) + instr[29,25] = instruction encoding + instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) + instr[22] = load/store (1=> load) + instr[21,15] = signed, scaled, offset + instr[14,10] = Rn + instr[ 9, 5] = Rd + instr[ 4, 0] = Rm */ + + uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3) + | uimm (aarch64_get_instr (cpu), 24, 22)); + int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); + + switch (dispatch) + { + case 2: store_pair_float (cpu, offset, Post); return; + case 3: load_pair_float (cpu, offset, Post); return; + case 4: store_pair_float (cpu, offset, NoWriteBack); return; + case 5: load_pair_float (cpu, offset, NoWriteBack); return; + case 6: store_pair_float (cpu, offset, Pre); return; + case 7: load_pair_float (cpu, offset, Pre); return; + + case 10: store_pair_double (cpu, offset, Post); return; + case 11: load_pair_double (cpu, offset, Post); return; + case 12: store_pair_double (cpu, offset, NoWriteBack); return; + case 13: load_pair_double (cpu, offset, NoWriteBack); return; + case 14: store_pair_double (cpu, offset, Pre); return; + case 15: load_pair_double (cpu, offset, Pre); return; + + case 18: store_pair_long_double (cpu, offset, Post); return; + case 19: load_pair_long_double (cpu, offset, Post); return; + case 20: store_pair_long_double (cpu, offset, NoWriteBack); return; + case 21: load_pair_long_double (cpu, offset, NoWriteBack); return; + case 22: store_pair_long_double (cpu, offset, Pre); return; + case 23: load_pair_long_double (cpu, offset, Pre); return; + + default: + HALT_UNALLOC; + } +} + +static inline unsigned +vec_reg (unsigned v, unsigned o) +{ + return (v + o) & 0x3F; +} + +/* Load multiple N-element structures to N consecutive registers. */ +static void +vec_load (sim_cpu *cpu, uint64_t address, unsigned N) +{ + int all = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + switch (size) + { + case 0: /* 8-bit operations. */ + if (all) + for (i = 0; i < (16 * N); i++) + aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15, + aarch64_get_mem_u8 (cpu, address + i)); + else + for (i = 0; i < (8 * N); i++) + aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7, + aarch64_get_mem_u8 (cpu, address + i)); + return; + + case 1: /* 16-bit operations. */ + if (all) + for (i = 0; i < (8 * N); i++) + aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7, + aarch64_get_mem_u16 (cpu, address + i * 2)); + else + for (i = 0; i < (4 * N); i++) + aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3, + aarch64_get_mem_u16 (cpu, address + i * 2)); + return; + + case 2: /* 32-bit operations. */ + if (all) + for (i = 0; i < (4 * N); i++) + aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3, + aarch64_get_mem_u32 (cpu, address + i * 4)); + else + for (i = 0; i < (2 * N); i++) + aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1, + aarch64_get_mem_u32 (cpu, address + i * 4)); + return; + + case 3: /* 64-bit operations. */ + if (all) + for (i = 0; i < (2 * N); i++) + aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1, + aarch64_get_mem_u64 (cpu, address + i * 8)); + else + for (i = 0; i < N; i++) + aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0, + aarch64_get_mem_u64 (cpu, address + i * 8)); + return; + + default: + HALT_UNREACHABLE; + } +} + +/* LD4: load multiple 4-element to four consecutive registers. */ +static void +LD4 (sim_cpu *cpu, uint64_t address) +{ + vec_load (cpu, address, 4); +} + +/* LD3: load multiple 3-element structures to three consecutive registers. */ +static void +LD3 (sim_cpu *cpu, uint64_t address) +{ + vec_load (cpu, address, 3); +} + +/* LD2: load multiple 2-element structures to two consecutive registers. */ +static void +LD2 (sim_cpu *cpu, uint64_t address) +{ + vec_load (cpu, address, 2); +} + +/* Load multiple 1-element structures into one register. */ +static void +LD1_1 (sim_cpu *cpu, uint64_t address) +{ + int all = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + switch (size) + { + case 0: + /* LD1 {Vd.16b}, addr, #16 */ + /* LD1 {Vd.8b}, addr, #8 */ + for (i = 0; i < (all ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_mem_u8 (cpu, address + i)); + return; + + case 1: + /* LD1 {Vd.8h}, addr, #16 */ + /* LD1 {Vd.4h}, addr, #8 */ + for (i = 0; i < (all ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_mem_u16 (cpu, address + i * 2)); + return; + + case 2: + /* LD1 {Vd.4s}, addr, #16 */ + /* LD1 {Vd.2s}, addr, #8 */ + for (i = 0; i < (all ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_mem_u32 (cpu, address + i * 4)); + return; + + case 3: + /* LD1 {Vd.2d}, addr, #16 */ + /* LD1 {Vd.1d}, addr, #8 */ + for (i = 0; i < (all ? 2 : 1); i++) + aarch64_set_vec_u64 (cpu, vd, i, + aarch64_get_mem_u64 (cpu, address + i * 8)); + return; + + default: + HALT_UNREACHABLE; + } +} + +/* Load multiple 1-element structures into two registers. */ +static void +LD1_2 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the LD2 version. + So why have two different instructions ? There must be something + wrong somewhere. */ + vec_load (cpu, address, 2); +} + +/* Load multiple 1-element structures into three registers. */ +static void +LD1_3 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the LD3 version. + So why have two different instructions ? There must be something + wrong somewhere. */ + vec_load (cpu, address, 3); +} + +/* Load multiple 1-element structures into four registers. */ +static void +LD1_4 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the LD4 version. + So why have two different instructions ? There must be something + wrong somewhere. */ + vec_load (cpu, address, 4); +} + +/* Store multiple N-element structures to N consecutive registers. */ +static void +vec_store (sim_cpu *cpu, uint64_t address, unsigned N) +{ + int all = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + switch (size) + { + case 0: /* 8-bit operations. */ + if (all) + for (i = 0; i < (16 * N); i++) + aarch64_set_mem_u8 + (cpu, address + i, + aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15)); + else + for (i = 0; i < (8 * N); i++) + aarch64_set_mem_u8 + (cpu, address + i, + aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7)); + return; + + case 1: /* 16-bit operations. */ + if (all) + for (i = 0; i < (8 * N); i++) + aarch64_set_mem_u16 + (cpu, address + i * 2, + aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7)); + else + for (i = 0; i < (4 * N); i++) + aarch64_set_mem_u16 + (cpu, address + i * 2, + aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3)); + return; + + case 2: /* 32-bit operations. */ + if (all) + for (i = 0; i < (4 * N); i++) + aarch64_set_mem_u32 + (cpu, address + i * 4, + aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3)); + else + for (i = 0; i < (2 * N); i++) + aarch64_set_mem_u32 + (cpu, address + i * 4, + aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1)); + return; + + case 3: /* 64-bit operations. */ + if (all) + for (i = 0; i < (2 * N); i++) + aarch64_set_mem_u64 + (cpu, address + i * 8, + aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1)); + else + for (i = 0; i < N; i++) + aarch64_set_mem_u64 + (cpu, address + i * 8, + aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0)); + return; + + default: + HALT_UNREACHABLE; + } +} + +/* Store multiple 4-element structure to four consecutive registers. */ +static void +ST4 (sim_cpu *cpu, uint64_t address) +{ + vec_store (cpu, address, 4); +} + +/* Store multiple 3-element structures to three consecutive registers. */ +static void +ST3 (sim_cpu *cpu, uint64_t address) +{ + vec_store (cpu, address, 3); +} + +/* Store multiple 2-element structures to two consecutive registers. */ +static void +ST2 (sim_cpu *cpu, uint64_t address) +{ + vec_store (cpu, address, 2); +} + +/* Store multiple 1-element structures into one register. */ +static void +ST1_1 (sim_cpu *cpu, uint64_t address) +{ + int all = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned i; + + switch (size) + { + case 0: + for (i = 0; i < (all ? 16 : 8); i++) + aarch64_set_mem_u8 (cpu, address + i, + aarch64_get_vec_u8 (cpu, vd, i)); + return; + + case 1: + for (i = 0; i < (all ? 8 : 4); i++) + aarch64_set_mem_u16 (cpu, address + i * 2, + aarch64_get_vec_u16 (cpu, vd, i)); + return; + + case 2: + for (i = 0; i < (all ? 4 : 2); i++) + aarch64_set_mem_u32 (cpu, address + i * 4, + aarch64_get_vec_u32 (cpu, vd, i)); + return; + + case 3: + for (i = 0; i < (all ? 2 : 1); i++) + aarch64_set_mem_u64 (cpu, address + i * 8, + aarch64_get_vec_u64 (cpu, vd, i)); + return; + + default: + HALT_UNREACHABLE; + } +} + +/* Store multiple 1-element structures into two registers. */ +static void +ST1_2 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the ST2 version. + So why have two different instructions ? There must be + something wrong somewhere. */ + vec_store (cpu, address, 2); +} + +/* Store multiple 1-element structures into three registers. */ +static void +ST1_3 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the ST3 version. + So why have two different instructions ? There must be + something wrong somewhere. */ + vec_store (cpu, address, 3); +} + +/* Store multiple 1-element structures into four registers. */ +static void +ST1_4 (sim_cpu *cpu, uint64_t address) +{ + /* FIXME: This algorithm is *exactly* the same as the ST4 version. + So why have two different instructions ? There must be + something wrong somewhere. */ + vec_store (cpu, address, 4); +} + +static void +do_vec_LDnR (sim_cpu *cpu, uint64_t address) +{ + /* instr[31] = 0 + instr[30] = element selector 0=>half, 1=>all elements + instr[29,24] = 00 1101 + instr[23] = 0=>simple, 1=>post + instr[22] = 1 + instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) + instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), + 11111 (immediate post inc) + instr[15,14] = 11 + instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) + instr[12] = 0 + instr[11,10] = element size 00=> byte(b), 01=> half(h), + 10=> word(s), 11=> double(d) + instr[9,5] = address + instr[4,0] = Vd */ + + unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + int i; + + NYI_assert (29, 24, 0x0D); + NYI_assert (22, 22, 1); + NYI_assert (15, 14, 3); + NYI_assert (12, 12, 0); + + switch ((uimm (aarch64_get_instr (cpu), 13, 13) << 1) + | uimm (aarch64_get_instr (cpu), 21, 21)) + { + case 0: /* LD1R. */ + switch (size) + { + case 0: + { + uint8_t val = aarch64_get_mem_u8 (cpu, address); + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, val); + break; + } + + case 1: + { + uint16_t val = aarch64_get_mem_u16 (cpu, address); + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, val); + break; + } + + case 2: + { + uint32_t val = aarch64_get_mem_u32 (cpu, address); + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, val); + break; + } + + case 3: + { + uint64_t val = aarch64_get_mem_u64 (cpu, address); + for (i = 0; i < (full ? 2 : 1); i++) + aarch64_set_vec_u64 (cpu, vd, i, val); + break; + } + + default: + HALT_UNALLOC; + } + break; + + case 1: /* LD2R. */ + switch (size) + { + case 0: + { + uint8_t val1 = aarch64_get_mem_u8 (cpu, address); + uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); + + for (i = 0; i < (full ? 16 : 8); i++) + { + aarch64_set_vec_u8 (cpu, vd, 0, val1); + aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); + } + break; + } + + case 1: + { + uint16_t val1 = aarch64_get_mem_u16 (cpu, address); + uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2); + + for (i = 0; i < (full ? 8 : 4); i++) + { + aarch64_set_vec_u16 (cpu, vd, 0, val1); + aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); + } + break; + } + + case 2: + { + uint32_t val1 = aarch64_get_mem_u32 (cpu, address); + uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); + + for (i = 0; i < (full ? 4 : 2); i++) + { + aarch64_set_vec_u32 (cpu, vd, 0, val1); + aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); + } + break; + } + + case 3: + { + uint64_t val1 = aarch64_get_mem_u64 (cpu, address); + uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); + + for (i = 0; i < (full ? 2 : 1); i++) + { + aarch64_set_vec_u64 (cpu, vd, 0, val1); + aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); + } + break; + } + + default: + HALT_UNALLOC; + } + break; + + case 2: /* LD3R. */ + switch (size) + { + case 0: + { + uint8_t val1 = aarch64_get_mem_u8 (cpu, address); + uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); + uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2); + + for (i = 0; i < (full ? 16 : 8); i++) + { + aarch64_set_vec_u8 (cpu, vd, 0, val1); + aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u8 (cpu, vd + 2, 0, val3); + } + } + break; + + case 1: + { + uint32_t val1 = aarch64_get_mem_u16 (cpu, address); + uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2); + uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4); + + for (i = 0; i < (full ? 8 : 4); i++) + { + aarch64_set_vec_u16 (cpu, vd, 0, val1); + aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u16 (cpu, vd + 2, 0, val3); + } + } + break; + + case 2: + { + uint32_t val1 = aarch64_get_mem_u32 (cpu, address); + uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); + uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8); + + for (i = 0; i < (full ? 4 : 2); i++) + { + aarch64_set_vec_u32 (cpu, vd, 0, val1); + aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u32 (cpu, vd + 2, 0, val3); + } + } + break; + + case 3: + { + uint64_t val1 = aarch64_get_mem_u64 (cpu, address); + uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); + uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16); + + for (i = 0; i < (full ? 2 : 1); i++) + { + aarch64_set_vec_u64 (cpu, vd, 0, val1); + aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u64 (cpu, vd + 2, 0, val3); + } + } + break; + + default: + HALT_UNALLOC; + } + break; + + case 3: /* LD4R. */ + switch (size) + { + case 0: + { + uint8_t val1 = aarch64_get_mem_u8 (cpu, address); + uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); + uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2); + uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3); + + for (i = 0; i < (full ? 16 : 8); i++) + { + aarch64_set_vec_u8 (cpu, vd, 0, val1); + aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u8 (cpu, vd + 2, 0, val3); + aarch64_set_vec_u8 (cpu, vd + 3, 0, val4); + } + } + break; + + case 1: + { + uint32_t val1 = aarch64_get_mem_u16 (cpu, address); + uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2); + uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4); + uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6); + + for (i = 0; i < (full ? 8 : 4); i++) + { + aarch64_set_vec_u16 (cpu, vd, 0, val1); + aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u16 (cpu, vd + 2, 0, val3); + aarch64_set_vec_u16 (cpu, vd + 3, 0, val4); + } + } + break; + + case 2: + { + uint32_t val1 = aarch64_get_mem_u32 (cpu, address); + uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); + uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8); + uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12); + + for (i = 0; i < (full ? 4 : 2); i++) + { + aarch64_set_vec_u32 (cpu, vd, 0, val1); + aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u32 (cpu, vd + 2, 0, val3); + aarch64_set_vec_u32 (cpu, vd + 3, 0, val4); + } + } + break; + + case 3: + { + uint64_t val1 = aarch64_get_mem_u64 (cpu, address); + uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); + uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16); + uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24); + + for (i = 0; i < (full ? 2 : 1); i++) + { + aarch64_set_vec_u64 (cpu, vd, 0, val1); + aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); + aarch64_set_vec_u64 (cpu, vd + 2, 0, val3); + aarch64_set_vec_u64 (cpu, vd + 3, 0, val4); + } + } + break; + + default: + HALT_UNALLOC; + } + break; + + default: + HALT_UNALLOC; + } +} + +static void +do_vec_load_store (sim_cpu *cpu) +{ + /* {LD|ST}<N> {Vd..Vd+N}, vaddr + + instr[31] = 0 + instr[30] = element selector 0=>half, 1=>all elements + instr[29,25] = 00110 + instr[24] = ? + instr[23] = 0=>simple, 1=>post + instr[22] = 0=>store, 1=>load + instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR) + instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP), + 11111 (immediate post inc) + instr[15,12] = elements and destinations. eg for load: + 0000=>LD4 => load multiple 4-element to + four consecutive registers + 0100=>LD3 => load multiple 3-element to + three consecutive registers + 1000=>LD2 => load multiple 2-element to + two consecutive registers + 0010=>LD1 => load multiple 1-element to + four consecutive registers + 0110=>LD1 => load multiple 1-element to + three consecutive registers + 1010=>LD1 => load multiple 1-element to + two consecutive registers + 0111=>LD1 => load multiple 1-element to + one register + 1100=>LDR1,LDR2 + 1110=>LDR3,LDR4 + instr[11,10] = element size 00=> byte(b), 01=> half(h), + 10=> word(s), 11=> double(d) + instr[9,5] = Vn, can be SP + instr[4,0] = Vd */ + + int post; + int load; + unsigned vn; + uint64_t address; + int type; + + if (uimm (aarch64_get_instr (cpu), 31, 31) != 0 + || uimm (aarch64_get_instr (cpu), 29, 25) != 0x06) + HALT_NYI; + + type = uimm (aarch64_get_instr (cpu), 15, 12); + if (type != 0xE && type != 0xE && uimm (aarch64_get_instr (cpu), 21, 21) != 0) + HALT_NYI; + + post = uimm (aarch64_get_instr (cpu), 23, 23); + load = uimm (aarch64_get_instr (cpu), 22, 22); + vn = uimm (aarch64_get_instr (cpu), 9, 5); + address = aarch64_get_reg_u64 (cpu, vn, SP_OK); + + if (post) + { + unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + + if (vm == R31) + { + unsigned sizeof_operation; + + switch (type) + { + case 0: sizeof_operation = 32; break; + case 4: sizeof_operation = 24; break; + case 8: sizeof_operation = 16; break; + + case 0xC: + sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 2 : 1; + sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10); + break; + + case 0xE: + sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 8 : 4; + sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10); + break; + + case 2: + case 6: + case 10: + case 7: + sizeof_operation = 2 << uimm (aarch64_get_instr (cpu), 11, 10); + break; + + default: + HALT_UNALLOC; + } + + if (uimm (aarch64_get_instr (cpu), 30, 30)) + sizeof_operation *= 2; + + aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation); + } + else + aarch64_set_reg_u64 (cpu, vn, SP_OK, + address + aarch64_get_reg_u64 (cpu, vm, NO_SP)); + } + else + { + NYI_assert (20, 16, 0); + } + + if (load) + { + switch (type) + { + case 0: LD4 (cpu, address); return; + case 4: LD3 (cpu, address); return; + case 8: LD2 (cpu, address); return; + case 2: LD1_4 (cpu, address); return; + case 6: LD1_3 (cpu, address); return; + case 10: LD1_2 (cpu, address); return; + case 7: LD1_1 (cpu, address); return; + + case 0xE: + case 0xC: do_vec_LDnR (cpu, address); return; + + default: + HALT_NYI; + } + } + + /* Stores. */ + switch (type) + { + case 0: ST4 (cpu, address); return; + case 4: ST3 (cpu, address); return; + case 8: ST2 (cpu, address); return; + case 2: ST1_4 (cpu, address); return; + case 6: ST1_3 (cpu, address); return; + case 10: ST1_2 (cpu, address); return; + case 7: ST1_1 (cpu, address); return; + default: + HALT_NYI; + } +} + +static void +dexLdSt (sim_cpu *cpu) +{ + /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); + assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 || + group == GROUP_LDST_1100 || group == GROUP_LDST_1110 + bits [29,28:26] of a LS are the secondary dispatch vector. */ + uint32_t group2 = dispatchLS (aarch64_get_instr (cpu)); + + switch (group2) + { + case LS_EXCL_000: + dexLoadExclusive (cpu); return; + + case LS_LIT_010: + case LS_LIT_011: + dexLoadLiteral (cpu); return; + + case LS_OTHER_110: + case LS_OTHER_111: + dexLoadOther (cpu); return; + + case LS_ADVSIMD_001: + do_vec_load_store (cpu); return; + + case LS_PAIR_100: + dex_load_store_pair_gr (cpu); return; + + case LS_PAIR_101: + dex_load_store_pair_fp (cpu); return; + + default: + /* Should never reach here. */ + HALT_NYI; + } +} + +/* Specific decode and execute for group Data Processing Register. */ + +static void +dexLogicalShiftedRegister (sim_cpu *cpu) +{ + /* assert instr[28:24] = 01010 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29:21] = op,N : 000 ==> AND, 001 ==> BIC, + 010 ==> ORR, 011 ==> ORN + 100 ==> EOR, 101 ==> EON, + 110 ==> ANDS, 111 ==> BICS + instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR + instr[15,10] = count : must be 0xxxxx for 32 bit + instr[9,5] = Rn + instr[4,0] = Rd */ + + /* unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); */ + uint32_t dispatch; + Shift shiftType; + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + + /* 32 bit operations must have count[5] = 0. */ + /* or else we have an UNALLOC. */ + uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10); + + if (!size && uimm (count, 5, 5)) + HALT_UNALLOC; + + shiftType = shift (aarch64_get_instr (cpu), 22); + + /* dispatch on size:op:N i.e aarch64_get_instr (cpu)[31,29:21]. */ + dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 29) << 1) + | uimm (aarch64_get_instr (cpu), 21, 21)); + + switch (dispatch) + { + case 0: and32_shift (cpu, shiftType, count); return; + case 1: bic32_shift (cpu, shiftType, count); return; + case 2: orr32_shift (cpu, shiftType, count); return; + case 3: orn32_shift (cpu, shiftType, count); return; + case 4: eor32_shift (cpu, shiftType, count); return; + case 5: eon32_shift (cpu, shiftType, count); return; + case 6: ands32_shift (cpu, shiftType, count); return; + case 7: bics32_shift (cpu, shiftType, count); return; + case 8: and64_shift (cpu, shiftType, count); return; + case 9: bic64_shift (cpu, shiftType, count); return; + case 10:orr64_shift (cpu, shiftType, count); return; + case 11:orn64_shift (cpu, shiftType, count); return; + case 12:eor64_shift (cpu, shiftType, count); return; + case 13:eon64_shift (cpu, shiftType, count); return; + case 14:ands64_shift (cpu, shiftType, count); return; + case 15:bics64_shift (cpu, shiftType, count); return; + default: HALT_UNALLOC; + } +} + +/* 32 bit conditional select. */ +static void +csel32 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u32 (cpu, rn, NO_SP) + : aarch64_get_reg_u32 (cpu, rm, NO_SP)); +} + +/* 64 bit conditional select. */ +static void +csel64 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u64 (cpu, rn, NO_SP) + : aarch64_get_reg_u64 (cpu, rm, NO_SP)); +} + +/* 32 bit conditional increment. */ +static void +csinc32 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u32 (cpu, rn, NO_SP) + : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1); +} + +/* 64 bit conditional increment. */ +static void +csinc64 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u64 (cpu, rn, NO_SP) + : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1); +} + +/* 32 bit conditional invert. */ +static void +csinv32 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u32 (cpu, rn, NO_SP) + : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP)); +} + +/* 64 bit conditional invert. */ +static void +csinv64 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u64 (cpu, rn, NO_SP) + : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP)); +} + +/* 32 bit conditional negate. */ +static void +csneg32 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u32 (cpu, rn, NO_SP) + : - aarch64_get_reg_u32 (cpu, rm, NO_SP)); +} + +/* 64 bit conditional negate. */ +static void +csneg64 (sim_cpu *cpu, CondCode cc) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + testConditionCode (cpu, cc) + ? aarch64_get_reg_u64 (cpu, rn, NO_SP) + : - aarch64_get_reg_u64 (cpu, rm, NO_SP)); +} + +static void +dexCondSelect (sim_cpu *cpu) +{ + /* assert instr[28,21] = 11011011 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC, + 100 ==> CSINV, 101 ==> CSNEG, + _1_ ==> UNALLOC + instr[29] = S : 0 ==> ok, 1 ==> UNALLOC + instr[15,12] = cond + instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */ + + CondCode cc; + uint32_t dispatch; + uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t op2 = uimm (aarch64_get_instr (cpu), 11, 10); + + if (S == 1) + HALT_UNALLOC; + + if (op2 & 0x2) + HALT_UNALLOC; + + cc = condcode (aarch64_get_instr (cpu), 12); + dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 1) | op2); + + switch (dispatch) + { + case 0: csel32 (cpu, cc); return; + case 1: csinc32 (cpu, cc); return; + case 2: csinv32 (cpu, cc); return; + case 3: csneg32 (cpu, cc); return; + case 4: csel64 (cpu, cc); return; + case 5: csinc64 (cpu, cc); return; + case 6: csinv64 (cpu, cc); return; + case 7: csneg64 (cpu, cc); return; + default: HALT_UNALLOC; + } +} + +/* Some helpers for counting leading 1 or 0 bits. */ + +/* Counts the number of leading bits which are the same + in a 32 bit value in the range 1 to 32. */ +static uint32_t +leading32 (uint32_t value) +{ + int32_t mask= 0xffff0000; + uint32_t count= 16; /* Counts number of bits set in mask. */ + uint32_t lo = 1; /* Lower bound for number of sign bits. */ + uint32_t hi = 32; /* Upper bound for number of sign bits. */ + + while (lo + 1 < hi) + { + int32_t test = (value & mask); + + if (test == 0 || test == mask) + { + lo = count; + count = (lo + hi) / 2; + mask >>= (count - lo); + } + else + { + hi = count; + count = (lo + hi) / 2; + mask <<= hi - count; + } + } + + if (lo != hi) + { + int32_t test; + + mask >>= 1; + test = (value & mask); + + if (test == 0 || test == mask) + count = hi; + else + count = lo; + } + + return count; +} + +/* Counts the number of leading bits which are the same + in a 64 bit value in the range 1 to 64. */ +static uint64_t +leading64 (uint64_t value) +{ + int64_t mask= 0xffffffff00000000LL; + uint64_t count = 32; /* Counts number of bits set in mask. */ + uint64_t lo = 1; /* Lower bound for number of sign bits. */ + uint64_t hi = 64; /* Upper bound for number of sign bits. */ + + while (lo + 1 < hi) + { + int64_t test = (value & mask); + + if (test == 0 || test == mask) + { + lo = count; + count = (lo + hi) / 2; + mask >>= (count - lo); + } + else + { + hi = count; + count = (lo + hi) / 2; + mask <<= hi - count; + } + } + + if (lo != hi) + { + int64_t test; + + mask >>= 1; + test = (value & mask); + + if (test == 0 || test == mask) + count = hi; + else + count = lo; + } + + return count; +} + +/* Bit operations. */ +/* N.B register args may not be SP. */ + +/* 32 bit count leading sign bits. */ +static void +cls32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. the result needs to exclude the leading bit. */ + aarch64_set_reg_u64 + (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1); +} + +/* 64 bit count leading sign bits. */ +static void +cls64 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. the result needs to exclude the leading bit. */ + aarch64_set_reg_u64 + (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1); +} + +/* 32 bit count leading zero bits. */ +static void +clz32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + + /* if the sign (top) bit is set then the count is 0. */ + if (pick32 (value, 31, 31)) + aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); + else + aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value)); +} + +/* 64 bit count leading zero bits. */ +static void +clz64 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + + /* if the sign (top) bit is set then the count is 0. */ + if (pick64 (value, 63, 63)) + aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); + else + aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value)); +} + +/* 32 bit reverse bits. */ +static void +rbit32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t result = 0; + int i; + + for (i = 0; i < 32; i++) + { + result <<= 1; + result |= (value & 1); + value >>= 1; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +/* 64 bit reverse bits. */ +static void +rbit64 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t result = 0; + int i; + + for (i = 0; i < 64; i++) + { + result <<= 1; + result |= (value & 1L); + value >>= 1; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +/* 32 bit reverse bytes. */ +static void +rev32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t result = 0; + int i; + + for (i = 0; i < 4; i++) + { + result <<= 8; + result |= (value & 0xff); + value >>= 8; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +/* 64 bit reverse bytes. */ +static void +rev64 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t result = 0; + int i; + + for (i = 0; i < 8; i++) + { + result <<= 8; + result |= (value & 0xffULL); + value >>= 8; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +/* 32 bit reverse shorts. */ +/* N.B.this reverses the order of the bytes in each half word. */ +static void +revh32 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint32_t result = 0; + int i; + + for (i = 0; i < 2; i++) + { + result <<= 8; + result |= (value & 0x00ff00ff); + value >>= 8; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +/* 64 bit reverse shorts. */ +/* N.B.this reverses the order of the bytes in each half word. */ +static void +revh64 (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); + uint64_t result = 0; + int i; + + for (i = 0; i < 2; i++) + { + result <<= 8; + result |= (value & 0x00ff00ff00ff00ffULL); + value >>= 8; + } + aarch64_set_reg_u64 (cpu, rd, NO_SP, result); +} + +static void +dexDataProc1Source (sim_cpu *cpu) +{ + /* assert instr[30] == 1 + aarch64_get_instr (cpu)[28,21] == 111010110 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[29] = S : 0 ==> ok, 1 ==> UNALLOC + instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC + instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16, + 000010 ==> REV, 000011 ==> UNALLOC + 000100 ==> CLZ, 000101 ==> CLS + ow ==> UNALLOC + instr[9,5] = rn : may not be SP + instr[4,0] = rd : may not be SP. */ + + uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t opcode2 = uimm (aarch64_get_instr (cpu), 20, 16); + uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10); + uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 31) << 3) | opcode); + + if (S == 1) + HALT_UNALLOC; + + if (opcode2 != 0) + HALT_UNALLOC; + + if (opcode & 0x38) + HALT_UNALLOC; + + switch (dispatch) + { + case 0: rbit32 (cpu); return; + case 1: revh32 (cpu); return; + case 2: rev32 (cpu); return; + case 4: clz32 (cpu); return; + case 5: cls32 (cpu); return; + case 8: rbit64 (cpu); return; + case 9: revh64 (cpu); return; + case 10:rev32 (cpu); return; + case 11:rev64 (cpu); return; + case 12:clz64 (cpu); return; + case 13:cls64 (cpu); return; + default: HALT_UNALLOC; + } +} + +/* Variable shift. + Shifts by count supplied in register. + N.B register args may not be SP. + These all use the shifted auxiliary function for + simplicity and clarity. Writing the actual shift + inline would avoid a branch and so be faster but + would also necessitate getting signs right. */ + +/* 32 bit arithmetic shift right. */ +static void +asrv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR, + (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); +} + +/* 64 bit arithmetic shift right. */ +static void +asrv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR, + (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); +} + +/* 32 bit logical shift left. */ +static void +lslv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL, + (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); +} + +/* 64 bit arithmetic shift left. */ +static void +lslv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL, + (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); +} + +/* 32 bit logical shift right. */ +static void +lsrv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR, + (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); +} + +/* 64 bit logical shift right. */ +static void +lsrv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR, + (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); +} + +/* 32 bit rotate right. */ +static void +rorv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR, + (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); +} + +/* 64 bit rotate right. */ +static void +rorv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR, + (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); +} + + +/* divide. */ + +/* 32 bit signed divide. */ +static void +cpuiv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + /* N.B. the pseudo-code does the divide using 64 bit data. */ + /* TODO : check that this rounds towards zero as required. */ + int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP); + int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP); + + aarch64_set_reg_s64 (cpu, rd, NO_SP, + divisor ? ((int32_t) (dividend / divisor)) : 0); +} + +/* 64 bit signed divide. */ +static void +cpuiv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* TODO : check that this rounds towards zero as required. */ + int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP); + + aarch64_set_reg_s64 + (cpu, rd, NO_SP, + divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0); +} + +/* 32 bit unsigned divide. */ +static void +udiv32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. the pseudo-code does the divide using 64 bit data. */ + uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP); + uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + divisor ? (uint32_t) (dividend / divisor) : 0); +} + +/* 64 bit unsigned divide. */ +static void +udiv64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* TODO : check that this rounds towards zero as required. */ + uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP); + + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0); +} + +static void +dexDataProc2Source (sim_cpu *cpu) +{ + /* assert instr[30] == 0 + instr[28,21] == 11010110 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[29] = S : 0 ==> ok, 1 ==> UNALLOC + instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV, + 001000 ==> LSLV, 001001 ==> LSRV + 001010 ==> ASRV, 001011 ==> RORV + ow ==> UNALLOC. */ + + uint32_t dispatch; + uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10); + + if (S == 1) + HALT_UNALLOC; + + if (opcode & 0x34) + HALT_UNALLOC; + + dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 31) << 3) + | (uimm (opcode, 3, 3) << 2) + | uimm (opcode, 1, 0)); + switch (dispatch) + { + case 2: udiv32 (cpu); return; + case 3: cpuiv32 (cpu); return; + case 4: lslv32 (cpu); return; + case 5: lsrv32 (cpu); return; + case 6: asrv32 (cpu); return; + case 7: rorv32 (cpu); return; + case 10: udiv64 (cpu); return; + case 11: cpuiv64 (cpu); return; + case 12: lslv64 (cpu); return; + case 13: lsrv64 (cpu); return; + case 14: asrv64 (cpu); return; + case 15: rorv64 (cpu); return; + default: HALT_UNALLOC; + } +} + + +/* Multiply. */ + +/* 32 bit multiply and add. */ +static void +madd32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, ra, NO_SP) + + aarch64_get_reg_u32 (cpu, rn, NO_SP) + * aarch64_get_reg_u32 (cpu, rm, NO_SP)); +} + +/* 64 bit multiply and add. */ +static void +madd64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, ra, NO_SP) + + aarch64_get_reg_u64 (cpu, rn, NO_SP) + * aarch64_get_reg_u64 (cpu, rm, NO_SP)); +} + +/* 32 bit multiply and sub. */ +static void +msub32 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u32 (cpu, ra, NO_SP) + - aarch64_get_reg_u32 (cpu, rn, NO_SP) + * aarch64_get_reg_u32 (cpu, rm, NO_SP)); +} + +/* 64 bit multiply and sub. */ +static void +msub64 (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, ra, NO_SP) + - aarch64_get_reg_u64 (cpu, rn, NO_SP) + * aarch64_get_reg_u64 (cpu, rm, NO_SP)); +} + +/* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */ +static void +smaddl (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. we need to multiply the signed 32 bit values in rn, rm to + obtain a 64 bit product. */ + aarch64_set_reg_s64 + (cpu, rd, NO_SP, + aarch64_get_reg_s64 (cpu, ra, NO_SP) + + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) + * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); +} + +/* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ +static void +smsubl (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. we need to multiply the signed 32 bit values in rn, rm to + obtain a 64 bit product. */ + aarch64_set_reg_s64 + (cpu, rd, NO_SP, + aarch64_get_reg_s64 (cpu, ra, NO_SP) + - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) + * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); +} + +/* Integer Multiply/Divide. */ + +/* First some macros and a helper function. */ +/* Macros to test or access elements of 64 bit words. */ + +/* Mask used to access lo 32 bits of 64 bit unsigned int. */ +#define LOW_WORD_MASK ((1ULL << 32) - 1) +/* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ +#define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK) +/* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ +#define highWordToU64(_value_u64) ((_value_u64) >> 32) + +/* Offset of sign bit in 64 bit signed integger. */ +#define SIGN_SHIFT_U64 63 +/* The sign bit itself -- also identifies the minimum negative int value. */ +#define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64) +/* Return true if a 64 bit signed int presented as an unsigned int is the + most negative value. */ +#define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64) +/* Return true (non-zero) if a 64 bit signed int presented as an unsigned + int has its sign bit set to false. */ +#define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64) +/* Return 1L or -1L according to whether a 64 bit signed int presented as + an unsigned int has its sign bit set or not. */ +#define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L) +/* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */ +#define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64) + +/* Multiply two 64 bit ints and return. + the hi 64 bits of the 128 bit product. */ + +static uint64_t +mul64hi (uint64_t value1, uint64_t value2) +{ + uint64_t resultmid1; + uint64_t result; + uint64_t value1_lo = lowWordToU64 (value1); + uint64_t value1_hi = highWordToU64 (value1) ; + uint64_t value2_lo = lowWordToU64 (value2); + uint64_t value2_hi = highWordToU64 (value2); + + /* Cross-multiply and collect results. */ + + uint64_t xproductlo = value1_lo * value2_lo; + uint64_t xproductmid1 = value1_lo * value2_hi; + uint64_t xproductmid2 = value1_hi * value2_lo; + uint64_t xproducthi = value1_hi * value2_hi; + uint64_t carry = 0; + /* Start accumulating 64 bit results. */ + /* Drop bottom half of lowest cross-product. */ + uint64_t resultmid = xproductlo >> 32; + /* Add in middle products. */ + resultmid = resultmid + xproductmid1; + + /* Check for overflow. */ + if (resultmid < xproductmid1) + /* Carry over 1 into top cross-product. */ + carry++; + + resultmid1 = resultmid + xproductmid2; + + /* Check for overflow. */ + if (resultmid1 < xproductmid2) + /* Carry over 1 into top cross-product. */ + carry++; + + /* Drop lowest 32 bits of middle cross-product. */ + result = resultmid1 >> 32; + + /* Add top cross-product plus and any carry. */ + result += xproducthi + carry; + + return result; +} + +/* Signed multiply high, source, source2 : + 64 bit, dest <-- high 64-bit of result. */ +static void +smulh (sim_cpu *cpu) +{ + uint64_t uresult; + int64_t result; + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + GReg ra = greg (aarch64_get_instr (cpu), 10); + int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); + uint64_t uvalue1; + uint64_t uvalue2; + int64_t signum = 1; + + if (ra != R31) + HALT_UNALLOC; + + /* Convert to unsigned and use the unsigned mul64hi routine + the fix the sign up afterwards. */ + if (value1 < 0) + { + signum *= -1L; + uvalue1 = -value1; + } + else + { + uvalue1 = value1; + } + + if (value2 < 0) + { + signum *= -1L; + uvalue2 = -value2; + } + else + { + uvalue2 = value2; + } + + uresult = mul64hi (uvalue1, uvalue2); + result = uresult; + result *= signum; + + aarch64_set_reg_s64 (cpu, rd, NO_SP, result); +} + +/* Unsigned multiply add long -- source, source2 : + 32 bit, source3 : 64 bit. */ +static void +umaddl (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. we need to multiply the signed 32 bit values in rn, rm to + obtain a 64 bit product. */ + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, ra, NO_SP) + + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) + * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); +} + +/* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ +static void +umsubl (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + + /* N.B. we need to multiply the signed 32 bit values in rn, rm to + obtain a 64 bit product. */ + aarch64_set_reg_u64 + (cpu, rd, NO_SP, + aarch64_get_reg_u64 (cpu, ra, NO_SP) + - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) + * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); +} + +/* Unsigned multiply high, source, source2 : + 64 bit, dest <-- high 64-bit of result. */ +static void +umulh (sim_cpu *cpu) +{ + unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + GReg ra = greg (aarch64_get_instr (cpu), 10); + + if (ra != R31) + HALT_UNALLOC; + + aarch64_set_reg_u64 (cpu, rd, NO_SP, + mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP), + aarch64_get_reg_u64 (cpu, rm, NO_SP))); +} + +static void +dexDataProc3Source (sim_cpu *cpu) +{ + /* assert instr[28,24] == 11011. */ + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least) + instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC + instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok + instr[15] = o0 : 0/1 ==> ok + instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit) + 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only) + 0100 ==> SMULH, (64 bit only) + 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only) + 1100 ==> UMULH (64 bit only) + ow ==> UNALLOC. */ + + uint32_t dispatch; + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t op54 = uimm (aarch64_get_instr (cpu), 30, 29); + uint32_t op31 = uimm (aarch64_get_instr (cpu), 23, 21); + uint32_t o0 = uimm (aarch64_get_instr (cpu), 15, 15); + + if (op54 != 0) + HALT_UNALLOC; + + if (size == 0) + { + if (op31 != 0) + HALT_UNALLOC; + + if (o0 == 0) + madd32 (cpu); + else + msub32 (cpu); + return; + } + + dispatch = (op31 << 1) | o0; + + switch (dispatch) + { + case 0: madd64 (cpu); return; + case 1: msub64 (cpu); return; + case 2: smaddl (cpu); return; + case 3: smsubl (cpu); return; + case 4: smulh (cpu); return; + case 10: umaddl (cpu); return; + case 11: umsubl (cpu); return; + case 12: umulh (cpu); return; + default: HALT_UNALLOC; + } +} + +static void +dexDPReg (sim_cpu *cpu) +{ + /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); + assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101 + bits [28:24:21] of a DPReg are the secondary dispatch vector. */ + uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu)); + + switch (group2) + { + case DPREG_LOG_000: + case DPREG_LOG_001: + dexLogicalShiftedRegister (cpu); return; + + case DPREG_ADDSHF_010: + dexAddSubtractShiftedRegister (cpu); return; + + case DPREG_ADDEXT_011: + dexAddSubtractExtendedRegister (cpu); return; + + case DPREG_ADDCOND_100: + { + /* This set bundles a variety of different operations. */ + /* Check for. */ + /* 1) add/sub w carry. */ + uint32_t mask1 = 0x1FE00000U; + uint32_t val1 = 0x1A000000U; + /* 2) cond compare register/immediate. */ + uint32_t mask2 = 0x1FE00000U; + uint32_t val2 = 0x1A400000U; + /* 3) cond select. */ + uint32_t mask3 = 0x1FE00000U; + uint32_t val3 = 0x1A800000U; + /* 4) data proc 1/2 source. */ + uint32_t mask4 = 0x1FE00000U; + uint32_t val4 = 0x1AC00000U; + + if ((aarch64_get_instr (cpu) & mask1) == val1) + dexAddSubtractWithCarry (cpu); + + else if ((aarch64_get_instr (cpu) & mask2) == val2) + CondCompare (cpu); + + else if ((aarch64_get_instr (cpu) & mask3) == val3) + dexCondSelect (cpu); + + else if ((aarch64_get_instr (cpu) & mask4) == val4) + { + /* Bit 30 is clear for data proc 2 source + and set for data proc 1 source. */ + if (aarch64_get_instr (cpu) & (1U << 30)) + dexDataProc1Source (cpu); + else + dexDataProc2Source (cpu); + } + + else + /* Should not reach here. */ + HALT_NYI; + + return; + } + + case DPREG_3SRC_110: + dexDataProc3Source (cpu); return; + + case DPREG_UNALLOC_101: + HALT_UNALLOC; + + case DPREG_3SRC_111: + dexDataProc3Source (cpu); return; + + default: + /* Should never reach here. */ + HALT_NYI; + } +} + +/* Unconditional Branch immediate. + Offset is a PC-relative byte offset in the range +/- 128MiB. + The offset is assumed to be raw from the decode i.e. the + simulator is expected to scale them from word offsets to byte. */ + +/* Unconditional branch. */ +static void +buc (sim_cpu *cpu, int32_t offset) +{ + aarch64_set_next_PC_by_offset (cpu, offset); +} + +static unsigned stack_depth = 0; + +/* Unconditional branch and link -- writes return PC to LR. */ +static void +bl (sim_cpu *cpu, int32_t offset) +{ + aarch64_save_LR (cpu); + aarch64_set_next_PC_by_offset (cpu, offset); + + if (TRACE_BRANCH_P (cpu)) + { + ++ stack_depth; + TRACE_BRANCH (cpu, + " %*scall %" PRIx64 " [%s]" + " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", + stack_depth, " ", aarch64_get_next_PC (cpu), + aarch64_get_func (aarch64_get_next_PC (cpu)), + aarch64_get_reg_u64 (cpu, 0, NO_SP), + aarch64_get_reg_u64 (cpu, 1, NO_SP), + aarch64_get_reg_u64 (cpu, 2, NO_SP) + ); + } +} + +/* Unconditional Branch register. + Branch/return address is in source register. */ + +/* Unconditional branch. */ +static void +br (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); +} + +/* Unconditional branch and link -- writes return PC to LR. */ +static void +blr (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + + /* The pseudo code in the spec says we update LR before fetching. + the value from the rn. */ + aarch64_save_LR (cpu); + aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); + + if (TRACE_BRANCH_P (cpu)) + { + ++ stack_depth; + TRACE_BRANCH (cpu, + " %*scall %" PRIx64 " [%s]" + " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", + stack_depth, " ", aarch64_get_next_PC (cpu), + aarch64_get_func (aarch64_get_next_PC (cpu)), + aarch64_get_reg_u64 (cpu, 0, NO_SP), + aarch64_get_reg_u64 (cpu, 1, NO_SP), + aarch64_get_reg_u64 (cpu, 2, NO_SP) + ); + } +} + +/* Return -- assembler will default source to LR this is functionally + equivalent to br but, presumably, unlike br it side effects the + branch predictor. */ +static void +ret (sim_cpu *cpu) +{ + unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); + + if (TRACE_BRANCH_P (cpu)) + { + TRACE_BRANCH (cpu, + " %*sreturn [result: %" PRIx64 "]", + stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP)); + -- stack_depth; + } +} + +/* NOP -- we implement this and call it from the decode in case we + want to intercept it later. */ + +static void +nop (sim_cpu *cpu) +{ +} + +/* Data synchronization barrier. */ + +static void +dsb (sim_cpu *cpu) +{ +} + +/* Data memory barrier. */ + +static void +dmb (sim_cpu *cpu) +{ +} + +/* Instruction synchronization barrier. */ + +static void +isb (sim_cpu *cpu) +{ +} + +static void +dexBranchImmediate (sim_cpu *cpu) +{ + /* assert instr[30,26] == 00101 + instr[31] ==> 0 == B, 1 == BL + instr[25,0] == imm26 branch offset counted in words. */ + + uint32_t top = uimm (aarch64_get_instr (cpu), 31, 31); + /* We have a 26 byte signed word offset which we need to pass to the + execute routine as a signed byte offset. */ + int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2; + + if (top) + bl (cpu, offset); + else + buc (cpu, offset); +} + +/* Control Flow. */ + +/* Conditional branch + + Offset is a PC-relative byte offset in the range +/- 1MiB pos is + a bit position in the range 0 .. 63 + + cc is a CondCode enum value as pulled out of the decode + + N.B. any offset register (source) can only be Xn or Wn. */ + +static void +bcc (sim_cpu *cpu, int32_t offset, CondCode cc) +{ + /* the test returns TRUE if CC is met. */ + if (testConditionCode (cpu, cc)) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* 32 bit branch on register non-zero. */ +static void +cbnz32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* 64 bit branch on register zero. */ +static void +cbnz (sim_cpu *cpu, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* 32 bit branch on register non-zero. */ +static void +cbz32 (sim_cpu *cpu, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* 64 bit branch on register zero. */ +static void +cbz (sim_cpu *cpu, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* Branch on register bit test non-zero -- one size fits all. */ +static void +tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +/* branch on register bit test zero -- one size fits all. */ +static void +tbz (sim_cpu *cpu, uint32_t pos, int32_t offset) +{ + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + + if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))) + aarch64_set_next_PC_by_offset (cpu, offset); +} + +static void +dexCompareBranchImmediate (sim_cpu *cpu) +{ + /* instr[30,25] = 01 1010 + instr[31] = size : 0 ==> 32, 1 ==> 64 + instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ + instr[23,5] = simm19 branch offset counted in words + instr[4,0] = rt */ + + uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t op = uimm (aarch64_get_instr (cpu), 24, 24); + int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; + + if (size == 0) + { + if (op == 0) + cbz32 (cpu, offset); + else + cbnz32 (cpu, offset); + } + else + { + if (op == 0) + cbz (cpu, offset); + else + cbnz (cpu, offset); + } +} + +static void +dexTestBranchImmediate (sim_cpu *cpu) +{ + /* instr[31] = b5 : bit 5 of test bit idx + instr[30,25] = 01 1011 + instr[24] = op : 0 ==> TBZ, 1 == TBNZ + instr[23,19] = b40 : bits 4 to 0 of test bit idx + instr[18,5] = simm14 : signed offset counted in words + instr[4,0] = uimm5 */ + + uint32_t pos = ((uimm (aarch64_get_instr (cpu), 31, 31) << 4) + | uimm (aarch64_get_instr (cpu), 23,19)); + int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2; + + NYI_assert (30, 25, 0x1b); + + if (uimm (aarch64_get_instr (cpu), 24, 24) == 0) + tbz (cpu, pos, offset); + else + tbnz (cpu, pos, offset); +} + +static void +dexCondBranchImmediate (sim_cpu *cpu) +{ + /* instr[31,25] = 010 1010 + instr[24] = op1; op => 00 ==> B.cond + instr[23,5] = simm19 : signed offset counted in words + instr[4] = op0 + instr[3,0] = cond */ + + int32_t offset; + CondCode cc; + uint32_t op = ((uimm (aarch64_get_instr (cpu), 24, 24) << 1) + | uimm (aarch64_get_instr (cpu), 4, 4)); + + NYI_assert (31, 25, 0x2a); + + if (op != 0) + HALT_UNALLOC; + + offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; + cc = condcode (aarch64_get_instr (cpu), 0); + + bcc (cpu, offset, cc); +} + +static void +dexBranchRegister (sim_cpu *cpu) +{ + /* instr[31,25] = 110 1011 + instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS + instr[20,16] = op2 : must be 11111 + instr[15,10] = op3 : must be 000000 + instr[4,0] = op2 : must be 11111. */ + + uint32_t op = uimm (aarch64_get_instr (cpu), 24, 21); + uint32_t op2 = uimm (aarch64_get_instr (cpu), 20, 16); + uint32_t op3 = uimm (aarch64_get_instr (cpu), 15, 10); + uint32_t op4 = uimm (aarch64_get_instr (cpu), 4, 0); + + NYI_assert (31, 25, 0x6b); + + if (op2 != 0x1F || op3 != 0 || op4 != 0) + HALT_UNALLOC; + + if (op == 0) + br (cpu); + + else if (op == 1) + blr (cpu); + + else if (op == 2) + ret (cpu); + + else + { + /* ERET and DRPS accept 0b11111 for rn = aarch64_get_instr (cpu)[4,0]. */ + /* anything else is unallocated. */ + uint32_t rn = greg (aarch64_get_instr (cpu), 0); + + if (rn != 0x1f) + HALT_UNALLOC; + + if (op == 4 || op == 5) + HALT_NYI; + + HALT_UNALLOC; + } +} + +/* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h + but this may not be available. So instead we define the values we need + here. */ +#define AngelSVC_Reason_Open 0x01 +#define AngelSVC_Reason_Close 0x02 +#define AngelSVC_Reason_Write 0x05 +#define AngelSVC_Reason_Read 0x06 +#define AngelSVC_Reason_IsTTY 0x09 +#define AngelSVC_Reason_Seek 0x0A +#define AngelSVC_Reason_FLen 0x0C +#define AngelSVC_Reason_Remove 0x0E +#define AngelSVC_Reason_Rename 0x0F +#define AngelSVC_Reason_Clock 0x10 +#define AngelSVC_Reason_Time 0x11 +#define AngelSVC_Reason_System 0x12 +#define AngelSVC_Reason_Errno 0x13 +#define AngelSVC_Reason_GetCmdLine 0x15 +#define AngelSVC_Reason_HeapInfo 0x16 +#define AngelSVC_Reason_ReportException 0x18 +#define AngelSVC_Reason_Elapsed 0x30 + + +static void +handle_halt (sim_cpu *cpu, uint32_t val) +{ + uint64_t result = 0; + + if (val != 0xf000) + { + TRACE_SYSCALL (cpu, " HLT [0x%x]", val); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGTRAP); + } + + /* We have encountered an Angel SVC call. See if we can process it. */ + switch (aarch64_get_reg_u32 (cpu, 0, NO_SP)) + { + case AngelSVC_Reason_HeapInfo: + { + /* Get the values. */ + uint64_t stack_top = aarch64_get_stack_start (cpu); + uint64_t heap_base = aarch64_get_heap_start (cpu); + + /* Get the pointer */ + uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); + ptr = aarch64_get_mem_u64 (cpu, ptr); + + /* Fill in the memory block. */ + /* Start addr of heap. */ + aarch64_set_mem_u64 (cpu, ptr + 0, heap_base); + /* End addr of heap. */ + aarch64_set_mem_u64 (cpu, ptr + 8, stack_top); + /* Lowest stack addr. */ + aarch64_set_mem_u64 (cpu, ptr + 16, heap_base); + /* Initial stack addr. */ + aarch64_set_mem_u64 (cpu, ptr + 24, stack_top); + + TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info"); + } + break; + + case AngelSVC_Reason_Open: + { + /* Get the pointer */ + /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */ + /* FIXME: For now we just assume that we will only be asked + to open the standard file descriptors. */ + static int fd = 0; + result = fd ++; + + TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1); + } + break; + + case AngelSVC_Reason_Close: + { + uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK); + TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh); + result = 0; + } + break; + + case AngelSVC_Reason_Errno: + result = 0; + TRACE_SYSCALL (cpu, " AngelSVC: Get Errno"); + break; + + case AngelSVC_Reason_Clock: + result = +#ifdef CLOCKS_PER_SEC + (CLOCKS_PER_SEC >= 100) + ? (clock () / (CLOCKS_PER_SEC / 100)) + : ((clock () * 100) / CLOCKS_PER_SEC) +#else + /* Presume unix... clock() returns microseconds. */ + (clock () / 10000) +#endif + ; + TRACE_SYSCALL (cpu, " AngelSVC: Get Clock"); + break; + + case AngelSVC_Reason_GetCmdLine: + { + /* Get the pointer */ + uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); + ptr = aarch64_get_mem_u64 (cpu, ptr); + + /* FIXME: No command line for now. */ + aarch64_set_mem_u64 (cpu, ptr, 0); + TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line"); + } + break; + + case AngelSVC_Reason_IsTTY: + result = 1; + TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?"); + break; + + case AngelSVC_Reason_Write: + { + /* Get the pointer */ + uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); + /* Get the write control block. */ + uint64_t fd = aarch64_get_mem_u64 (cpu, ptr); + uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8); + uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16); + + TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %" + PRIx64 " on descriptor %" PRIx64, + len, buf, fd); + + if (len > 1280) + { + TRACE_SYSCALL (cpu, + " AngelSVC: Write: Suspiciously long write: %ld", + (long) len); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGBUS); + } + else if (fd == 1) + { + printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf)); + if (disas) + /* So that the output stays in sync with trace output. */ + fflush (stdout); + } + else if (fd == 2) + { + TRACE (cpu, 0, "\n"); + sim_io_eprintf (CPU_STATE (cpu), "%.*s", + (int) len, aarch64_get_mem_ptr (cpu, buf)); + TRACE (cpu, 0, "\n"); + } + else + { + TRACE_SYSCALL (cpu, + " AngelSVC: Write: Unexpected file handle: %d", + (int) fd); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGABRT); + } + } + break; + + case AngelSVC_Reason_ReportException: + { + /* Get the pointer */ + uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); + /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */ + uint64_t type = aarch64_get_mem_u64 (cpu, ptr); + uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8); + + TRACE_SYSCALL (cpu, + "Angel Exception: type 0x%" PRIx64 " state %" PRIx64, + type, state); + + if (type == 0x20026) + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_exited, state); + else + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGINT); + } + break; + + case AngelSVC_Reason_Read: + case AngelSVC_Reason_FLen: + case AngelSVC_Reason_Seek: + case AngelSVC_Reason_Remove: + case AngelSVC_Reason_Time: + case AngelSVC_Reason_System: + case AngelSVC_Reason_Rename: + case AngelSVC_Reason_Elapsed: + default: + TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]", + aarch64_get_reg_u32 (cpu, 0, NO_SP)); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_stopped, SIM_SIGTRAP); + } + + aarch64_set_reg_u64 (cpu, 0, NO_SP, result); +} + +static void +dexExcpnGen (sim_cpu *cpu) +{ + /* instr[31:24] = 11010100 + instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK + 010 ==> HLT, 101 ==> DBG GEN EXCPN + instr[20,5] = imm16 + instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC + instr[1,0] = LL : discriminates opc */ + + uint32_t opc = uimm (aarch64_get_instr (cpu), 23, 21); + uint32_t imm16 = uimm (aarch64_get_instr (cpu), 20, 5); + uint32_t opc2 = uimm (aarch64_get_instr (cpu), 4, 2); + uint32_t LL; + + NYI_assert (31, 24, 0xd4); + + if (opc2 != 0) + HALT_UNALLOC; + + LL = uimm (aarch64_get_instr (cpu), 1, 0); + + /* We only implement HLT and BRK for now. */ + if (opc == 1 && LL == 0) + { + TRACE_EVENTS (cpu, " BRK [0x%x]", imm16); + sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), + sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); + } + + if (opc == 2 && LL == 0) + handle_halt (cpu, imm16); + + else if (opc == 0 || opc == 5) + HALT_NYI; + + else + HALT_UNALLOC; +} + +static void +dexSystem (sim_cpu *cpu) +{ + /* instr[31:22] = 1101 01010 0 + instr[21] = L + instr[20,19] = op0 + instr[18,16] = op1 + instr[15,12] = CRn + instr[11,8] = CRm + instr[7,5] = op2 + instr[4,0] = uimm5 */ + + /* We are interested in HINT, DSB, DMB and ISB + + Hint #0 encodes NOOP (this is the only hint we care about) + L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111, + CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101 + + DSB, DMB, ISB are data store barrier, data memory barrier and + instruction store barrier, respectively, where + + L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111, + op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110 + CRm<3:2> ==> domain, CRm<1:0> ==> types, + domain : 00 ==> OuterShareable, 01 ==> Nonshareable, + 10 ==> InerShareable, 11 ==> FullSystem + types : 01 ==> Reads, 10 ==> Writes, + 11 ==> All, 00 ==> All (domain == FullSystem). */ + + unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t l_op0_op1_crn = uimm (aarch64_get_instr (cpu), 21, 12); + + NYI_assert (31, 22, 0x354); + + switch (l_op0_op1_crn) + { + case 0x032: + if (rt == 0x1F) + { + /* NOP has CRm != 0000 OR. */ + /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */ + uint32_t crm = uimm (aarch64_get_instr (cpu), 11, 8); + uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5); + + if (crm != 0 || (op2 == 0 || op2 > 5)) + { + /* Actually call nop method so we can reimplement it later. */ + nop (cpu); + return; + } + } + HALT_NYI; + + case 0x033: + { + uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5); + + switch (op2) + { + case 2: + HALT_NYI; + + case 4: dsb (cpu); return; + case 5: dmb (cpu); return; + case 6: isb (cpu); return; + case 7: + default: HALT_UNALLOC; + } + } + + case 0x3B0: + /* MRS Wt, sys-reg. */ + /* FIXME: Ignore for now. */ + return; + + case 0x3B4: + case 0x3BD: + /* MRS Xt, sys-reg. */ + /* FIXME: Ignore for now. */ + return; + + case 0x0B7: + /* DC <type>, x<n>. */ + /* FIXME: Ignore for now. */ + return; + + default: + if (uimm (aarch64_get_instr (cpu), 21, 20) == 0x1) + /* MSR <sys-reg>, <Xreg>. */ + return; + HALT_NYI; + } +} + +static void +dexBr (sim_cpu *cpu) +{ + /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); + assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011 + bits [31,29] of a BrExSys are the secondary dispatch vector. */ + uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu)); + + switch (group2) + { + case BR_IMM_000: + return dexBranchImmediate (cpu); + + case BR_IMMCMP_001: + /* Compare has bit 25 clear while test has it set. */ + if (!uimm (aarch64_get_instr (cpu), 25, 25)) + dexCompareBranchImmediate (cpu); + else + dexTestBranchImmediate (cpu); + return; + + case BR_IMMCOND_010: + /* This is a conditional branch if bit 25 is clear otherwise + unallocated. */ + if (!uimm (aarch64_get_instr (cpu), 25, 25)) + dexCondBranchImmediate (cpu); + else + HALT_UNALLOC; + return; + + case BR_UNALLOC_011: + HALT_UNALLOC; + + case BR_IMM_100: + dexBranchImmediate (cpu); + return; + + case BR_IMMCMP_101: + /* Compare has bit 25 clear while test has it set. */ + if (!uimm (aarch64_get_instr (cpu), 25, 25)) + dexCompareBranchImmediate (cpu); + else + dexTestBranchImmediate (cpu); + return; + + case BR_REG_110: + /* Unconditional branch reg has bit 25 set. */ + if (uimm (aarch64_get_instr (cpu), 25, 25)) + dexBranchRegister (cpu); + + /* This includes both Excpn Gen, System and unalloc operations. + We need to decode the Excpn Gen operation BRK so we can plant + debugger entry points. + Excpn Gen operations have aarch64_get_instr (cpu)[24] = 0. + we need to decode at least one of the System operations NOP + which is an alias for HINT #0. + System operations have aarch64_get_instr (cpu)[24,22] = 100. */ + else if (uimm (aarch64_get_instr (cpu), 24, 24) == 0) + dexExcpnGen (cpu); + + else if (uimm (aarch64_get_instr (cpu), 24, 22) == 4) + dexSystem (cpu); + + else + HALT_UNALLOC; + + return; + + case BR_UNALLOC_111: + HALT_UNALLOC; + + default: + /* Should never reach here. */ + HALT_NYI; + } +} + +static void +aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc) +{ + /* We need to check if gdb wants an in here. */ + /* checkBreak (cpu);. */ + + uint64_t group = dispatchGroup (aarch64_get_instr (cpu)); + + switch (group) + { + case GROUP_PSEUDO_0000: dexPseudo (cpu); break; + case GROUP_LDST_0100: dexLdSt (cpu); break; + case GROUP_DPREG_0101: dexDPReg (cpu); break; + case GROUP_LDST_0110: dexLdSt (cpu); break; + case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break; + case GROUP_DPIMM_1000: dexDPImm (cpu); break; + case GROUP_DPIMM_1001: dexDPImm (cpu); break; + case GROUP_BREXSYS_1010: dexBr (cpu); break; + case GROUP_BREXSYS_1011: dexBr (cpu); break; + case GROUP_LDST_1100: dexLdSt (cpu); break; + case GROUP_DPREG_1101: dexDPReg (cpu); break; + case GROUP_LDST_1110: dexLdSt (cpu); break; + case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break; + + case GROUP_UNALLOC_0001: + case GROUP_UNALLOC_0010: + case GROUP_UNALLOC_0011: + HALT_UNALLOC; + + default: + /* Should never reach here. */ + HALT_NYI; + } +} + +static bfd_boolean +aarch64_step (sim_cpu *cpu) +{ + uint64_t pc = aarch64_get_PC (cpu); + + if (pc == TOP_LEVEL_RETURN_PC) + return FALSE; + + aarch64_set_next_PC (cpu, pc + 4); + aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc); + + if (TRACE_INSN_P (cpu)) + { + if (disas) + TRACE_INSN (cpu, " pc = %" PRIx64 " ", pc); + else + TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %x", pc, + aarch64_get_instr (cpu)); + } + else if (disas) + sim_io_eprintf (CPU_STATE (cpu), " %" PRIx64 " ", pc); + + if (disas) + aarch64_print_insn (CPU_STATE (cpu), pc); + + aarch64_decode_and_execute (cpu, pc); + + return TRUE; +} + +void +aarch64_run (SIM_DESC sd) +{ + sim_cpu *cpu = STATE_CPU (sd, 0); + + while (aarch64_step (cpu)) + aarch64_update_PC (cpu); + + sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu), + sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); +} + +void +aarch64_init (sim_cpu *cpu, uint64_t pc) +{ + uint64_t sp = aarch64_get_stack_start (cpu); + + /* Install SP, FP and PC and set LR to -20 + so we can detect a top-level return. */ + aarch64_set_reg_u64 (cpu, SP, SP_OK, sp); + aarch64_set_reg_u64 (cpu, FP, SP_OK, sp); + aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC); + aarch64_set_next_PC (cpu, pc); + aarch64_update_PC (cpu); + aarch64_init_LIT_table (); +} |