aboutsummaryrefslogtreecommitdiff
path: root/sim/arm/thumbemu.c
diff options
context:
space:
mode:
Diffstat (limited to 'sim/arm/thumbemu.c')
-rw-r--r--sim/arm/thumbemu.c455
1 files changed, 455 insertions, 0 deletions
diff --git a/sim/arm/thumbemu.c b/sim/arm/thumbemu.c
new file mode 100644
index 0000000..eaf6e0c
--- /dev/null
+++ b/sim/arm/thumbemu.c
@@ -0,0 +1,455 @@
+/* thumbemu.c -- Thumb instruction emulation.
+ Copyright (C) 1996, Cygnus Software Technologies Ltd.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* We can provide simple Thumb simulation by decoding the Thumb
+instruction into its corresponding ARM instruction, and using the
+existing ARM simulator. */
+
+#ifndef MODET /* required for the Thumb instruction support */
+#if 1
+#error "MODET needs to be defined for the Thumb world to work"
+#else
+#define MODET (1)
+#endif
+#endif
+
+#include "armdefs.h"
+#include "armemu.h"
+
+/* Decode a 16bit Thumb instruction. The instruction is in the low
+ 16-bits of the tinstr field, with the following Thumb instruction
+ held in the high 16-bits. Passing in two Thumb instructions allows
+ easier simulation of the special dual BL instruction. */
+
+tdstate
+ARMul_ThumbDecode (state,pc,tinstr,ainstr)
+ ARMul_State *state;
+ ARMword pc;
+ ARMword tinstr;
+ ARMword *ainstr;
+{
+ tdstate valid = t_decoded; /* default assumes a valid instruction */
+ ARMword next_instr;
+
+ if (state->bigendSig)
+ {
+ next_instr = tinstr & 0xFFFF;
+ tinstr >>= 16;
+ }
+ else
+ {
+ next_instr = tinstr >> 16;
+ tinstr &= 0xFFFF;
+ }
+
+#if 1 /* debugging to catch non updates */
+ *ainstr = 0xDEADC0DE;
+#endif
+
+ switch ((tinstr & 0xF800) >> 11)
+ {
+ case 0: /* LSL */
+ case 1: /* LSR */
+ case 2: /* ASR */
+ /* Format 1 */
+ *ainstr = 0xE1B00000 /* base opcode */
+ | ((tinstr & 0x1800) >> (11 - 5)) /* shift type */
+ | ((tinstr & 0x07C0) << (7 - 6)) /* imm5 */
+ | ((tinstr & 0x0038) >> 3) /* Rs */
+ | ((tinstr & 0x0007) << 12); /* Rd */
+ break;
+ case 3: /* ADD/SUB */
+ /* Format 2 */
+ {
+ ARMword subset[4] = {
+ 0xE0900000, /* ADDS Rd,Rs,Rn */
+ 0xE0500000, /* SUBS Rd,Rs,Rn */
+ 0xE2900000, /* ADDS Rd,Rs,#imm3 */
+ 0xE2500000 /* SUBS Rd,Rs,#imm3 */
+ };
+ /* It is quicker indexing into a table, than performing switch
+ or conditionals: */
+ *ainstr = subset[(tinstr & 0x0600) >> 9] /* base opcode */
+ | ((tinstr & 0x01C0) >> 6) /* Rn or imm3 */
+ | ((tinstr & 0x0038) << (16 - 3)) /* Rs */
+ | ((tinstr & 0x0007) << (12 - 0)); /* Rd */
+ }
+ break;
+ case 4: /* MOV */
+ case 5: /* CMP */
+ case 6: /* ADD */
+ case 7: /* SUB */
+ /* Format 3 */
+ {
+ ARMword subset[4] = {
+ 0xE3B00000, /* MOVS Rd,#imm8 */
+ 0xE3500000, /* CMP Rd,#imm8 */
+ 0xE2900000, /* ADDS Rd,Rd,#imm8 */
+ 0xE2500000, /* SUBS Rd,Rd,#imm8 */
+ };
+ *ainstr = subset[(tinstr & 0x1800) >> 11] /* base opcode */
+ | ((tinstr & 0x00FF) >> 0) /* imm8 */
+ | ((tinstr & 0x0700) << (16 - 8)) /* Rn */
+ | ((tinstr & 0x0700) << (12 - 8)); /* Rd */
+ }
+ break ;
+ case 8: /* Arithmetic and high register transfers */
+ /* TODO: Since the subsets for both Format 4 and Format 5
+ instructions are made up of different ARM encodings, we could
+ save the following conditional, and just have one large
+ subset. */
+ if ((tinstr & (1 << 10)) == 0)
+ {
+ /* Format 4 */
+ struct {
+ ARMword opcode;
+ enum {t_norm,t_shift,t_neg,t_mul} otype;
+ } subset[16] = {
+ {0xE0100000, t_norm}, /* ANDS Rd,Rd,Rs */
+ {0xE0300000, t_norm}, /* EORS Rd,Rd,Rs */
+ {0xE1B00010, t_shift}, /* MOVS Rd,Rd,LSL Rs */
+ {0xE1B00030, t_shift}, /* MOVS Rd,Rd,LSR Rs */
+ {0xE1B00050, t_shift}, /* MOVS Rd,Rd,ASR Rs */
+ {0xE0B00000, t_norm}, /* ADCS Rd,Rd,Rs */
+ {0xE0D00000, t_norm}, /* SBCS Rd,Rd,Rs */
+ {0xE1B00070, t_shift}, /* MOVS Rd,Rd,ROR Rs */
+ {0xE1100000, t_norm}, /* TST Rd,Rs */
+ {0xE2700000, t_neg}, /* RSBS Rd,Rs,#0 */
+ {0xE1500000, t_norm}, /* CMP Rd,Rs */
+ {0xE1700000, t_norm}, /* CMN Rd,Rs */
+ {0xE1900000, t_norm}, /* ORRS Rd,Rd,Rs */
+ {0xE0100090, t_mul}, /* MULS Rd,Rd,Rs */
+ {0xE1D00000, t_norm}, /* BICS Rd,Rd,Rs */
+ {0xE1F00000, t_norm} /* MVNS Rd,Rs */
+ };
+ *ainstr = subset[(tinstr & 0x03C0)>>6].opcode; /* base */
+ switch (subset[(tinstr & 0x03C0)>>6].otype)
+ {
+ case t_norm:
+ *ainstr |= ((tinstr & 0x0007) << 16) /* Rn */
+ | ((tinstr & 0x0007) << 12) /* Rd */
+ | ((tinstr & 0x0038) >> 3); /* Rs */
+ break;
+ case t_shift:
+ *ainstr |= ((tinstr & 0x0007) << 12) /* Rd */
+ | ((tinstr & 0x0007) >> 0) /* Rm */
+ | ((tinstr & 0x0038) << (8 - 3)); /* Rs */
+ break;
+ case t_neg:
+ *ainstr |= ((tinstr & 0x0007) << 12) /* Rd */
+ | ((tinstr & 0x0038) << (16 - 3)); /* Rn */
+ break;
+ case t_mul:
+ *ainstr |= ((tinstr & 0x0007) << 16) /* Rd */
+ | ((tinstr & 0x0007) << 8) /* Rs */
+ | ((tinstr & 0x0038) >> 3); /* Rm */
+ break;
+ }
+ }
+ else
+ {
+ /* Format 5 */
+ ARMword Rd = ((tinstr & 0x0007) >> 0);
+ ARMword Rs = ((tinstr & 0x0038) >> 3);
+ if (tinstr & (1 << 7))
+ Rd += 8;
+ if (tinstr & (1 << 6))
+ Rs += 8;
+ switch ((tinstr & 0x03C0) >> 6)
+ {
+ case 0x1: /* ADD Rd,Rd,Hs */
+ case 0x2: /* ADD Hd,Hd,Rs */
+ case 0x3: /* ADD Hd,Hd,Hs */
+ *ainstr = 0xE0800000 /* base */
+ | (Rd << 16) /* Rn */
+ | (Rd << 12) /* Rd */
+ | (Rs << 0); /* Rm */
+ break;
+ case 0x5: /* CMP Rd,Hs */
+ case 0x6: /* CMP Hd,Rs */
+ case 0x7: /* CMP Hd,Hs */
+ *ainstr = 0xE1500000 /* base */
+ | (Rd << 16) /* Rn */
+ | (Rd << 12) /* Rd */
+ | (Rs << 0); /* Rm */
+ break;
+ case 0x9: /* MOV Rd,Hs */
+ case 0xA: /* MOV Hd,Rs */
+ case 0xB: /* MOV Hd,Hs */
+ *ainstr = 0xE1A00000 /* base */
+ | (Rd << 16) /* Rn */
+ | (Rd << 12) /* Rd */
+ | (Rs << 0); /* Rm */
+ break;
+ case 0xC: /* BX Rs */
+ case 0xD: /* BX Hs */
+ *ainstr = 0xE12FFF10 /* base */
+ | ((tinstr & 0x0078) >> 3); /* Rd */
+ break;
+ case 0x0: /* UNDEFINED */
+ case 0x4: /* UNDEFINED */
+ case 0x8: /* UNDEFINED */
+ case 0xE: /* UNDEFINED */
+ case 0xF: /* UNDEFINED */
+ valid = t_undefined;
+ break;
+ }
+ }
+ break;
+ case 9: /* LDR Rd,[PC,#imm8] */
+ /* Format 6 */
+ *ainstr = 0xE59F0000 /* base */
+ | ((tinstr & 0x0700) << (12 - 8)) /* Rd */
+ | ((tinstr & 0x00FF) << (2 - 0)); /* off8 */
+ break;
+ case 10:
+ case 11:
+ /* TODO: Format 7 and Format 8 perform the same ARM encoding, so
+ the following could be merged into a single subset, saving on
+ the following boolean: */
+ if ((tinstr & (1 << 9)) == 0)
+ {
+ /* Format 7 */
+ ARMword subset[4] = {
+ 0xE7800000, /* STR Rd,[Rb,Ro] */
+ 0xE7C00000, /* STRB Rd,[Rb,Ro] */
+ 0xE7900000, /* LDR Rd,[Rb,Ro] */
+ 0xE7D00000 /* LDRB Rd,[Rb,Ro] */
+ };
+ *ainstr = subset[(tinstr & 0x0C00) >> 10] /* base */
+ | ((tinstr & 0x0007) << (12 - 0)) /* Rd */
+ | ((tinstr & 0x0038) << (16 - 3)) /* Rb */
+ | ((tinstr & 0x01C0) >> 6); /* Ro */
+ }
+ else
+ {
+ /* Format 8 */
+ ARMword subset[4] = {
+ 0xE18000B0, /* STRH Rd,[Rb,Ro] */
+ 0xE19000D0, /* LDRSB Rd,[Rb,Ro] */
+ 0xE19000B0, /* LDRH Rd,[Rb,Ro] */
+ 0xE19000F0 /* LDRSH Rd,[Rb,Ro] */
+ };
+ *ainstr = subset[(tinstr & 0x0C00) >> 10] /* base */
+ | ((tinstr & 0x0007) << (12 - 0)) /* Rd */
+ | ((tinstr & 0x0038) << (16 - 3)) /* Rb */
+ | ((tinstr & 0x01C0) >> 6); /* Ro */
+ }
+ break;
+ case 12: /* STR Rd,[Rb,#imm5] */
+ case 13: /* LDR Rd,[Rb,#imm5] */
+ case 14: /* STRB Rd,[Rb,#imm5] */
+ case 15: /* LDRB Rd,[Rb,#imm5] */
+ /* Format 9 */
+ {
+ ARMword subset[4] = {
+ 0xE5800000, /* STR Rd,[Rb,#imm5] */
+ 0xE5900000, /* LDR Rd,[Rb,#imm5] */
+ 0xE5C00000, /* STRB Rd,[Rb,#imm5] */
+ 0xE5D00000 /* LDRB Rd,[Rb,#imm5] */
+ };
+ /* The offset range defends on whether we are transferring a
+ byte or word value: */
+ *ainstr = subset[(tinstr & 0x1800) >> 11] /* base */
+ | ((tinstr & 0x0007) << (12 - 0)) /* Rd */
+ | ((tinstr & 0x0038) << (16 - 3)) /* Rb */
+ | ((tinstr & 0x07C0) >>
+ (6 - ((tinstr & (1 << 12)) ? 0 : 2))); /* off5 */
+ }
+ break;
+ case 16: /* STRH Rd,[Rb,#imm5] */
+ case 17: /* LDRH Rd,[Rb,#imm5] */
+ /* Format 10 */
+ *ainstr = ((tinstr & (1 << 11)) /* base */
+ ? 0xE1D000B0 /* LDRH */
+ : 0xE1C000B0) /* STRH */
+ | ((tinstr & 0x0007) << (12 - 0)) /* Rd */
+ | ((tinstr & 0x0038) << (16 - 3)) /* Rb */
+ | ((tinstr & 0x01C0) >> (6 - 1)) /* off5, low nibble */
+ | ((tinstr & 0x0600) >> (9 - 8)); /* off5, high nibble */
+ break;
+ case 18: /* STR Rd,[SP,#imm8] */
+ case 19: /* LDR Rd,[SP,#imm8] */
+ /* Format 11 */
+ *ainstr = ((tinstr & (1 << 11)) /* base */
+ ? 0xE59D0000 /* LDR */
+ : 0xE58D0000) /* STR */
+ | ((tinstr & 0x0700) << (12 - 8)) /* Rd */
+ | ((tinstr & 0x00FF) << 2); /* off8 */
+ break;
+ case 20: /* ADD Rd,PC,#imm8 */
+ case 21: /* ADD Rd,SP,#imm8 */
+ /* Format 12 */
+ if ((tinstr & (1 << 11)) == 0)
+ {
+ /* NOTE: The PC value used here should by word aligned */
+ /* We encode shift-left-by-2 in the rotate immediate field,
+ so no shift of off8 is needed. */
+ *ainstr = 0xE28F0F00 /* base */
+ | ((tinstr & 0x0700) << (12 - 8)) /* Rd */
+ | (tinstr & 0x00FF); /* off8 */
+ }
+ else
+ {
+ /* We encode shift-left-by-2 in the rotate immediate field,
+ so no shift of off8 is needed. */
+ *ainstr = 0xE28D0F00 /* base */
+ | ((tinstr & 0x0700) << (12 - 8)) /* Rd */
+ | (tinstr & 0x00FF); /* off8 */
+ }
+ break;
+ case 22:
+ case 23:
+ if ((tinstr & 0x0F00) == 0x0000)
+ {
+ /* Format 13 */
+ /* NOTE: The instruction contains a shift left of 2
+ equivalent (implemented as ROR #30): */
+ *ainstr = ((tinstr & (1 << 7)) /* base */
+ ? 0xE24DDF00 /* SUB */
+ : 0xE28DDF00) /* ADD */
+ | (tinstr & 0x007F); /* off7 */
+ }
+ else
+ {
+ /* Format 14 */
+ ARMword subset[4] = {
+ 0xE92D0000, /* STMDB sp!,{rlist} */
+ 0xE92D4000, /* STMDB sp!,{rlist,lr} */
+ 0xE8BD0000, /* LDMIA sp!,{rlist} */
+ 0xE8BD8000 /* LDMIA sp!,{rlist,pc} */
+ };
+ *ainstr = subset[((tinstr & (1 << 11)) >> 10)
+ | ((tinstr & (1 << 8)) >> 8)] /* base */
+ | (tinstr & 0x00FF); /* mask8 */
+ }
+ break;
+ case 24: /* STMIA */
+ case 25: /* LDMIA */
+ /* Format 15 */
+ *ainstr = ((tinstr & (1 << 11)) /* base */
+ ? 0xE8B00000 /* LDMIA */
+ : 0xE8A00000) /* STMIA */
+ | ((tinstr & 0x0700) << (16 - 8)) /* Rb */
+ | (tinstr & 0x00FF); /* mask8 */
+ break;
+ case 26: /* Bcc */
+ case 27: /* Bcc/SWI */
+ if ((tinstr & 0x0F00) == 0x0F00)
+ {
+ /* Format 17 : SWI */
+ *ainstr = 0xEF000000;
+ /* Breakpoint must be handled specially. */
+ if ((tinstr & 0x00FF) == 0x18)
+ *ainstr |= ((tinstr & 0x00FF) << 16);
+ else
+ *ainstr |= (tinstr & 0x00FF);
+ }
+ else if ((tinstr & 0x0F00) != 0x0E00)
+ {
+ /* Format 16 */
+ int doit = FALSE;
+ /* TODO: Since we are doing a switch here, we could just add
+ the SWI and undefined instruction checks into this
+ switch to same on a couple of conditionals: */
+ switch ((tinstr & 0x0F00) >> 8) {
+ case EQ : doit=ZFLAG ;
+ break ;
+ case NE : doit=!ZFLAG ;
+ break ;
+ case VS : doit=VFLAG ;
+ break ;
+ case VC : doit=!VFLAG ;
+ break ;
+ case MI : doit=NFLAG ;
+ break ;
+ case PL : doit=!NFLAG ;
+ break ;
+ case CS : doit=CFLAG ;
+ break ;
+ case CC : doit=!CFLAG ;
+ break ;
+ case HI : doit=(CFLAG && !ZFLAG) ;
+ break ;
+ case LS : doit=(!CFLAG || ZFLAG) ;
+ break ;
+ case GE : doit=((!NFLAG && !VFLAG) || (NFLAG && VFLAG)) ;
+ break ;
+ case LT : doit=((NFLAG && !VFLAG) || (!NFLAG && VFLAG)) ;
+ break ;
+ case GT : doit=((!NFLAG && !VFLAG && !ZFLAG) || (NFLAG && VFLAG && !ZFLAG)) ;
+ break ;
+ case LE : doit=((NFLAG && !VFLAG) || (!NFLAG && VFLAG)) || ZFLAG ;
+ break ;
+ }
+ if (doit) {
+ state->Reg[15] = pc + 4
+ + (((tinstr & 0x7F) << 1)
+ | ((tinstr & (1 << 7)) ? 0xFFFFFF00 : 0));
+ FLUSHPIPE;
+ }
+ valid = t_branch;
+ }
+ else /* UNDEFINED : cc=1110(AL) uses different format */
+ valid = t_undefined;
+ break;
+ case 28: /* B */
+ /* Format 18 */
+ state->Reg[15] = pc + 4
+ + (((tinstr & 0x3FF) << 1)
+ | ((tinstr & (1 << 10)) ? 0xFFFFF800 : 0));
+ FLUSHPIPE;
+ valid = t_branch;
+ break;
+ case 29: /* UNDEFINED */
+ valid = t_undefined;
+ break;
+ case 30: /* BL instruction 1 */
+ /* Format 19 */
+ /* There is no single ARM instruction equivalent for this Thumb
+ instruction. To keep the simulation simple (from the user
+ perspective) we check if the following instruction is the
+ second half of this BL, and if it is we simulate it
+ immediately. */
+ state->Reg[14] = state->Reg[15] \
+ + (((tinstr & 0x07FF) << 12) \
+ | ((tinstr & (1 << 10)) ? 0xFF800000 : 0));
+ valid = t_branch; /* in-case we don't have the 2nd half */
+ tinstr = next_instr; /* move the instruction down */
+ if (((tinstr & 0xF800) >> 11) != 31)
+ break; /* exit, since not correct instruction */
+ /* else we fall through to process the second half of the BL */
+ pc += 2; /* point the pc at the 2nd half */
+ case 31: /* BL instruction 2 */
+ /* Format 19 */
+ /* There is no single ARM instruction equivalent for this
+ instruction. Also, it should only ever be matched with the
+ fmt19 "BL instruction 1" instruction. However, we do allow
+ the simulation of it on its own, with undefined results if
+ r14 is not suitably initialised.*/
+ {
+ ARMword tmp = (pc + 2);
+ state->Reg[15] = (state->Reg[14] + ((tinstr & 0x07FF) << 1));
+ state->Reg[14] = (tmp | 1);
+ valid = t_branch;
+ FLUSHPIPE;
+ }
+ break;
+ }
+
+ return valid;
+}