aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm.ld25
-rw-r--r--cpu-all.h9
-rw-r--r--disas.c6
-rw-r--r--dyngen.c271
-rw-r--r--dyngen.h60
-rw-r--r--elf.h2
6 files changed, 295 insertions, 78 deletions
diff --git a/arm.ld b/arm.ld
index 61f4c34..e216cbf 100644
--- a/arm.ld
+++ b/arm.ld
@@ -53,6 +53,10 @@ SECTIONS
.fini : { *(.fini) } =0x47ff041f
.rodata : { *(.rodata) *(.gnu.linkonce.r*) }
.rodata1 : { *(.rodata1) }
+ .ARM.extab : { *(.ARM.extab* .gnu.linkonce.armextab.*) }
+ __exidx_start = .;
+ .ARM.exidx : { *(.ARM.exidx* .gnu.linkonce.armexidx.*) }
+ __exidx_end = .;
.reginfo : { *(.reginfo) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
@@ -63,7 +67,28 @@ SECTIONS
*(.gnu.linkonce.d*)
CONSTRUCTORS
}
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.data1 : { *(.data1) }
+ .preinit_array :
+ {
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ KEEP (*(.preinit_array))
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+ }
+ .init_array :
+ {
+ PROVIDE_HIDDEN (__init_array_start = .);
+ KEEP (*(SORT(.init_array.*)))
+ KEEP (*(.init_array))
+ PROVIDE_HIDDEN (__init_array_end = .);
+ }
+ .fini_array :
+ {
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ KEEP (*(.fini_array))
+ KEEP (*(SORT(.fini_array.*)))
+ PROVIDE_HIDDEN (__fini_array_end = .);
+ }
.ctors :
{
*(.ctors)
diff --git a/cpu-all.h b/cpu-all.h
index 145d84b..996289e 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -992,6 +992,15 @@ static inline int64_t cpu_get_real_ticks (void)
return rval.i64;
#endif
}
+#else
+/* The host CPU doesn't have an easily accessible cycle counter.
+ Just return a monotonically increasing vlue. This will be totally wrong,
+ but hopefully better than nothing. */
+static inline int64_t cpu_get_real_ticks (void)
+{
+ static int64_t ticks = 0;
+ return ticks++;
+}
#endif
/* profiling */
diff --git a/disas.c b/disas.c
index fd91b92..27b6777 100644
--- a/disas.c
+++ b/disas.c
@@ -271,11 +271,9 @@ void disas(FILE *out, void *code, unsigned long size)
for (pc = (unsigned long)code; pc < (unsigned long)code + size; pc += count) {
fprintf(out, "0x%08lx: ", pc);
#ifdef __arm__
- /* since data are included in the code, it is better to
+ /* since data is included in the code, it is better to
display code data too */
- if (is_host) {
- fprintf(out, "%08x ", (int)bfd_getl32((const bfd_byte *)pc));
- }
+ fprintf(out, "%08x ", (int)bfd_getl32((const bfd_byte *)pc));
#endif
count = print_insn(pc, &disasm_info);
fprintf(out, "\n");
diff --git a/dyngen.c b/dyngen.c
index 5fb921e..2d93283 100644
--- a/dyngen.c
+++ b/dyngen.c
@@ -1255,90 +1255,149 @@ int arm_emit_ldr_info(const char *name, unsigned long start_offset,
{
uint8_t *p;
uint32_t insn;
- int offset, min_offset, pc_offset, data_size;
+ int offset, min_offset, pc_offset, data_size, spare, max_pool;
uint8_t data_allocated[1024];
unsigned int data_index;
+ int type;
memset(data_allocated, 0, sizeof(data_allocated));
p = p_start;
min_offset = p_end - p_start;
+ spare = 0x7fffffff;
while (p < p_start + min_offset) {
insn = get32((uint32_t *)p);
+ /* TODO: Armv5e ldrd. */
+ /* TODO: VFP load. */
if ((insn & 0x0d5f0000) == 0x051f0000) {
/* ldr reg, [pc, #im] */
offset = insn & 0xfff;
if (!(insn & 0x00800000))
- offset = -offset;
+ offset = -offset;
+ max_pool = 4096;
+ type = 0;
+ } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) {
+ /* FPA ldf. */
+ offset = (insn & 0xff) << 2;
+ if (!(insn & 0x00800000))
+ offset = -offset;
+ max_pool = 1024;
+ type = 1;
+ } else if ((insn & 0x0fff0000) == 0x028f0000) {
+ /* Some gcc load a doubleword immediate with
+ add regN, pc, #imm
+ ldmia regN, {regN, regM}
+ Hope and pray the compiler never generates somethin like
+ add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */
+ int r;
+
+ r = (insn & 0xf00) >> 7;
+ offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r));
+ max_pool = 1024;
+ type = 2;
+ } else {
+ max_pool = 0;
+ type = -1;
+ }
+ if (type >= 0) {
+ /* PC-relative load needs fixing up. */
+ if (spare > max_pool - offset)
+ spare = max_pool - offset;
if ((offset & 3) !=0)
- error("%s:%04x: ldr pc offset must be 32 bit aligned",
+ error("%s:%04x: pc offset must be 32 bit aligned",
+ name, start_offset + p - p_start);
+ if (offset < 0)
+ error("%s:%04x: Embedded literal value",
name, start_offset + p - p_start);
pc_offset = p - p_start + offset + 8;
if (pc_offset <= (p - p_start) ||
pc_offset >= (p_end - p_start))
- error("%s:%04x: ldr pc offset must point inside the function code",
+ error("%s:%04x: pc offset must point inside the function code",
name, start_offset + p - p_start);
if (pc_offset < min_offset)
min_offset = pc_offset;
if (outfile) {
- /* ldr position */
+ /* The intruction position */
fprintf(outfile, " arm_ldr_ptr->ptr = gen_code_ptr + %d;\n",
p - p_start);
- /* ldr data index */
- data_index = ((p_end - p_start) - pc_offset - 4) >> 2;
- fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr + %d;\n",
+ /* The position of the constant pool data. */
+ data_index = ((p_end - p_start) - pc_offset) >> 2;
+ fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n",
data_index);
+ fprintf(outfile, " arm_ldr_ptr->type = %d;\n", type);
fprintf(outfile, " arm_ldr_ptr++;\n");
- if (data_index >= sizeof(data_allocated))
- error("%s: too many data", name);
- if (!data_allocated[data_index]) {
- ELF_RELOC *rel;
- int i, addend, type;
- const char *sym_name, *p;
- char relname[1024];
-
- data_allocated[data_index] = 1;
-
- /* data value */
- addend = get32((uint32_t *)(p_start + pc_offset));
- relname[0] = '\0';
- for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
- if (rel->r_offset == (pc_offset + start_offset)) {
- sym_name = get_rel_sym_name(rel);
- /* the compiler leave some unnecessary references to the code */
- get_reloc_expr(relname, sizeof(relname), sym_name);
- type = ELF32_R_TYPE(rel->r_info);
- if (type != R_ARM_ABS32)
- error("%s: unsupported data relocation", name);
- break;
- }
- }
- fprintf(outfile, " arm_data_ptr[%d] = 0x%x",
- data_index, addend);
- if (relname[0] != '\0')
- fprintf(outfile, " + %s", relname);
- fprintf(outfile, ";\n");
- }
}
}
p += 4;
}
+
+ /* Copy and relocate the constant pool data. */
data_size = (p_end - p_start) - min_offset;
if (data_size > 0 && outfile) {
- fprintf(outfile, " arm_data_ptr += %d;\n", data_size >> 2);
+ spare += min_offset;
+ fprintf(outfile, " arm_data_ptr -= %d;\n", data_size >> 2);
+ fprintf(outfile, " arm_pool_ptr -= %d;\n", data_size);
+ fprintf(outfile, " if (arm_pool_ptr > gen_code_ptr + %d)\n"
+ " arm_pool_ptr = gen_code_ptr + %d;\n",
+ spare, spare);
+
+ data_index = 0;
+ for (pc_offset = min_offset;
+ pc_offset < p_end - p_start;
+ pc_offset += 4) {
+
+ ELF_RELOC *rel;
+ int i, addend, type;
+ const char *sym_name;
+ char relname[1024];
+
+ /* data value */
+ addend = get32((uint32_t *)(p_start + pc_offset));
+ relname[0] = '\0';
+ for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+ if (rel->r_offset == (pc_offset + start_offset)) {
+ sym_name = get_rel_sym_name(rel);
+ /* the compiler leave some unnecessary references to the code */
+ get_reloc_expr(relname, sizeof(relname), sym_name);
+ type = ELF32_R_TYPE(rel->r_info);
+ if (type != R_ARM_ABS32)
+ error("%s: unsupported data relocation", name);
+ break;
+ }
+ }
+ fprintf(outfile, " arm_data_ptr[%d] = 0x%x",
+ data_index, addend);
+ if (relname[0] != '\0')
+ fprintf(outfile, " + %s", relname);
+ fprintf(outfile, ";\n");
+
+ data_index++;
+ }
}
- /* the last instruction must be a mov pc, lr */
if (p == p_start)
goto arm_ret_error;
p -= 4;
insn = get32((uint32_t *)p);
- if ((insn & 0xffff0000) != 0xe91b0000) {
+ /* The last instruction must be an ldm instruction. There are several
+ forms generated by gcc:
+ ldmib sp, {..., pc} (implies a sp adjustment of +4)
+ ldmia sp, {..., pc}
+ ldmea fp, {..., pc} */
+ if ((insn & 0xffff8000) == 0xe99d8000) {
+ if (outfile) {
+ fprintf(outfile,
+ " *(uint32_t *)(gen_code_ptr + %d) = 0xe28dd004;\n",
+ p - p_start);
+ }
+ p += 4;
+ } else if ((insn & 0xffff8000) != 0xe89d8000
+ && (insn & 0xffff8000) != 0xe91b8000) {
arm_ret_error:
if (!outfile)
printf("%s: invalid epilog\n", name);
}
- return p - p_start;
+ return p - p_start;
}
#endif
@@ -1537,6 +1596,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
}
#elif defined(HOST_ARM)
{
+ uint32_t insn;
+
if ((p_end - p_start) <= 16)
error("%s: function too small", name);
if (get32((uint32_t *)p_start) != 0xe1a0c00d ||
@@ -1545,6 +1606,12 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
error("%s: invalid prolog", name);
p_start += 12;
start_offset += 12;
+ insn = get32((uint32_t *)p_start);
+ if ((insn & 0xffffff00) == 0xe24dd000) {
+ /* Stack adjustment. Assume op uses the frame pointer. */
+ p_start -= 4;
+ start_offset -= 4;
+ }
copy_size = arm_emit_ldr_info(name, start_offset, NULL, p_start, p_end,
relocs, nb_relocs);
}
@@ -2282,7 +2349,37 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
int type;
int addend;
int reloc_offset;
-
+ uint32_t insn;
+
+ insn = get32((uint32_t *)(p_start + 4));
+ /* If prologue ends in sub sp, sp, #const then assume
+ op has a stack frame and needs the frame pointer. */
+ if ((insn & 0xffffff00) == 0xe24dd000) {
+ int i;
+ uint32_t opcode;
+ opcode = 0xe28db000; /* add fp, sp, #0. */
+#if 0
+/* ??? Need to undo the extra stack adjustment at the end of the op.
+ For now just leave the stack misaligned and hope it doesn't break anything
+ too important. */
+ if ((insn & 4) != 0) {
+ /* Preserve doubleword stack alignment. */
+ fprintf(outfile,
+ " *(uint32_t *)(gen_code_ptr + 4)= 0x%x;\n",
+ insn + 4);
+ opcode -= 4;
+ }
+#endif
+ insn = get32((uint32_t *)(p_start - 4));
+ /* Calculate the size of the saved registers,
+ excluding pc. */
+ for (i = 0; i < 15; i++) {
+ if (insn & (1 << i))
+ opcode += 4;
+ }
+ fprintf(outfile,
+ " *(uint32_t *)gen_code_ptr = 0x%x;\n", opcode);
+ }
arm_emit_ldr_info(name, start_offset, outfile, p_start, p_end,
relocs, nb_relocs);
@@ -2303,6 +2400,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size,
reloc_offset, name, addend);
break;
case R_ARM_PC24:
+ case R_ARM_JUMP24:
+ case R_ARM_CALL:
fprintf(outfile, " arm_reloc_pc24((uint32_t *)(gen_code_ptr + %d), 0x%x, %s);\n",
reloc_offset, addend, name);
break;
@@ -2407,6 +2506,28 @@ int gen_file(FILE *outfile, int out_type)
} else {
/* generate big code generation switch */
+
+#ifdef HOST_ARM
+ /* We need to know the size of all the ops so we can figure out when
+ to emit constant pools. This must be consistent with opc.h. */
+fprintf(outfile,
+"static const uint32_t arm_opc_size[] = {\n"
+" 0,\n" /* end */
+" 0,\n" /* nop */
+" 0,\n" /* nop1 */
+" 0,\n" /* nop2 */
+" 0,\n"); /* nop3 */
+ for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+ const char *name;
+ name = get_sym_name(sym);
+ if (strstart(name, OP_PREFIX, NULL)) {
+ fprintf(outfile, " %d,\n", sym->st_size);
+ }
+ }
+fprintf(outfile,
+"};\n");
+#endif
+
fprintf(outfile,
"int dyngen_code(uint8_t *gen_code_buf,\n"
" uint16_t *label_offsets, uint16_t *jmp_offsets,\n"
@@ -2417,10 +2538,36 @@ fprintf(outfile,
" const uint32_t *opparam_ptr;\n");
#ifdef HOST_ARM
+/* Arm is tricky because it uses constant pools for loading immediate values.
+ We assume (and require) each function is code followed by a constant pool.
+ All the ops are small so this should be ok. For each op we figure
+ out how much "spare" range we have in the load instructions. This allows
+ us to insert subsequent ops in between the op and the constant pool,
+ eliminating the neeed to jump around the pool.
+
+ We currently generate:
+
+ [ For this example we assume merging would move op1_pool out of range.
+ In practice we should be able to combine many ops before the offset
+ limits are reached. ]
+ op1_code;
+ op2_code;
+ goto op3;
+ op2_pool;
+ op1_pool;
+op3:
+ op3_code;
+ ret;
+ op3_pool;
+
+ Ideally we'd put op1_pool before op2_pool, but that requires two passes.
+ */
fprintf(outfile,
" uint8_t *last_gen_code_ptr = gen_code_buf;\n"
" LDREntry *arm_ldr_ptr = arm_ldr_table;\n"
-" uint32_t *arm_data_ptr = arm_data_table;\n");
+" uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+/* Initialise the parmissible pool offset to an arbitary large value. */
+" uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n");
#endif
#ifdef HOST_IA64
{
@@ -2489,9 +2636,23 @@ fprintf(outfile,
/* Generate prologue, if needed. */
fprintf(outfile,
-" for(;;) {\n"
-" switch(*opc_ptr++) {\n"
-);
+" for(;;) {\n");
+
+#ifdef HOST_ARM
+/* Generate constant pool if needed */
+fprintf(outfile,
+" if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n"
+" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n"
+" last_gen_code_ptr = gen_code_ptr;\n"
+" arm_ldr_ptr = arm_ldr_table;\n"
+" arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n"
+" arm_pool_ptr = gen_code_ptr + 0x1000000;\n"
+" }\n");
+#endif
+
+fprintf(outfile,
+" switch(*opc_ptr++) {\n");
for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
const char *name;
@@ -2525,17 +2686,6 @@ fprintf(outfile,
" goto the_end;\n"
" }\n");
-#ifdef HOST_ARM
-/* generate constant table if needed */
-fprintf(outfile,
-" if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - MAX_OP_SIZE)) {\n"
-" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n"
-" last_gen_code_ptr = gen_code_ptr;\n"
-" arm_ldr_ptr = arm_ldr_table;\n"
-" arm_data_ptr = arm_data_table;\n"
-" }\n");
-#endif
-
fprintf(outfile,
" }\n"
@@ -2553,7 +2703,10 @@ fprintf(outfile,
/* generate some code patching */
#ifdef HOST_ARM
-fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n");
+fprintf(outfile,
+"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n"
+" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, "
+"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n");
#endif
/* flush instruction cache */
fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n");
diff --git a/dyngen.h b/dyngen.h
index fe0a936..2a87c44 100644
--- a/dyngen.h
+++ b/dyngen.h
@@ -19,7 +19,7 @@
*/
int __op_param1, __op_param2, __op_param3;
-#ifdef __sparc__
+#if defined(__sparc__) || defined(__arm__)
void __op_gen_label1(){}
void __op_gen_label2(){}
void __op_gen_label3(){}
@@ -145,18 +145,16 @@ void fix_bsr(void *p, int offset) {
#ifdef __arm__
-#define MAX_OP_SIZE (128 * 4) /* in bytes */
-/* max size of the code that can be generated without calling arm_flush_ldr */
-#define MAX_FRAG_SIZE (1024 * 4)
-//#define MAX_FRAG_SIZE (135 * 4) /* for testing */
+#define ARM_LDR_TABLE_SIZE 1024
typedef struct LDREntry {
uint8_t *ptr;
uint32_t *data_ptr;
+ unsigned type:2;
} LDREntry;
static LDREntry arm_ldr_table[1024];
-static uint32_t arm_data_table[1024];
+static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE];
extern char exec_loop;
@@ -175,8 +173,9 @@ static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr,
int offset, data_size, target;
uint8_t *data_ptr;
uint32_t insn;
+ uint32_t mask;
- data_size = (uint8_t *)data_end - (uint8_t *)data_start;
+ data_size = (data_end - data_start) << 2;
if (gen_jmp) {
/* generate branch to skip the data */
@@ -198,17 +197,48 @@ static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr,
offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) +
(unsigned long)data_ptr -
(unsigned long)ptr - 8;
- insn = *ptr & ~(0xfff | 0x00800000);
if (offset < 0) {
- offset = - offset;
- } else {
- insn |= 0x00800000;
- }
- if (offset > 0xfff) {
- fprintf(stderr, "Error ldr offset\n");
+ fprintf(stderr, "Negative constant pool offset\n");
abort();
}
- insn |= offset;
+ switch (le->type) {
+ case 0: /* ldr */
+ mask = ~0x00800fff;
+ if (offset >= 4096) {
+ fprintf(stderr, "Bad ldr offset\n");
+ abort();
+ }
+ break;
+ case 1: /* ldc */
+ mask = ~0x008000ff;
+ if (offset >= 1024 ) {
+ fprintf(stderr, "Bad ldc offset\n");
+ abort();
+ }
+ break;
+ case 2: /* add */
+ mask = ~0xfff;
+ if (offset >= 1024 ) {
+ fprintf(stderr, "Bad add offset\n");
+ abort();
+ }
+ break;
+ default:
+ fprintf(stderr, "Bad pc relative fixup\n");
+ abort();
+ }
+ insn = *ptr & mask;
+ switch (le->type) {
+ case 0: /* ldr */
+ insn |= offset | 0x00800000;
+ break;
+ case 1: /* ldc */
+ insn |= (offset >> 2) | 0x00800000;
+ break;
+ case 2: /* add */
+ insn |= (offset >> 2) | 0xf00;
+ break;
+ }
*ptr = insn;
}
return gen_code_ptr;
diff --git a/elf.h b/elf.h
index 8ceb949..1825d50 100644
--- a/elf.h
+++ b/elf.h
@@ -502,6 +502,8 @@ typedef struct {
#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */
#define R_ARM_GOT32 26 /* 32 bit GOT entry */
#define R_ARM_PLT32 27 /* 32 bit PLT address */
+#define R_ARM_CALL 28
+#define R_ARM_JUMP24 29
#define R_ARM_GNU_VTENTRY 100
#define R_ARM_GNU_VTINHERIT 101
#define R_ARM_THM_PC11 102 /* thumb unconditional branch */