MIPS: Stop building position independent code

U-Boot has up until now built with -fpic for the MIPS architecture, producing position independent code which uses indirection through a global offset table, making relocation fairly straightforward as it simply involves patching up GOT entries. Using -fpic does however have some downsides. The biggest of these is that generated code is bloated in various ways. For example, function calls are indirected through the GOT & the t9 register: 8f998064 lw t9,-32668(gp) 0320f809 jalr t9 Without -fpic the call is simply: 0f803f01 jal be00fc04 <puts> This is more compact & faster (due to the lack of the load & the dependency the jump has on its result). It is also easier to read & debug because the disassembly shows what function is being called, rather than just an offset from gp which would then have to be looked up in the ELF to discover the target function. Another disadvantage of -fpic is that each function begins with a sequence to calculate the value of the gp register, for example: 3c1c0004 lui gp,0x4 279c3384 addiu gp,gp,13188 0399e021 addu gp,gp,t9 Without using -fpic this sequence no longer appears at the start of each function, reducing code size considerably. This patch switches U-Boot from building with -fpic to building with -fno-pic, in order to gain the benefits described above. The cost of this is an extra step during the build process to extract relocation data from the ELF & write it into a new .rel section in a compact format, plus the added complexity of dealing with multiple types of relocation rather than the single type that applied to the GOT. The benefit is smaller, cleaner, more debuggable code. The relocate_code() function is reimplemented in C to handle the new relocation scheme, which also makes it easier to read & debug. Taking maltael_defconfig as an example the size of u-boot.bin built using the Codescape MIPS 2016.05-06 toolchain (gcc 4.9.2, binutils 2.24.90) shrinks from 254KiB to 224KiB. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Daniel Schwierzeck <daniel.schwierzeck@gmail.com> Cc: u-boot@lists.denx.de Reviewed-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com> Tested-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
author: Paul Burton <paul.burton@imgtec.com> 2017-06-19 11:53:47 -0700
committer: Daniel Schwierzeck <daniel.schwierzeck@gmail.com> 2017-07-25 20:44:00 +0200
commit: 703ec9ddf965063cd79910df281657b056879368 (patch)
tree: 480721684cc83e43b00d3ce3c2cd5fc92390daf4 /arch/mips/cpu
parent: 09bebb8397f72340812de577a2d00612c7b7ed75 (diff)
download: u-boot-703ec9ddf965063cd79910df281657b056879368.zip
u-boot-703ec9ddf965063cd79910df281657b056879368.tar.gz
u-boot-703ec9ddf965063cd79910df281657b056879368.tar.bz2
2 files changed, 9 insertions, 162 deletions
diff --git a/arch/mips/cpu/start.S b/arch/mips/cpu/start.S
index d01ee9f..952c57a 100644
--- a/arch/mips/cpu/start.S
+++ b/arch/mips/cpu/start.S
@@ -221,18 +221,6 @@ wr_done:
 	ehb
 #endif
 
-	/*
-	 * Initialize $gp, force pointer sized alignment of bal instruction to
-	 * forbid the compiler to put nop's between bal and _gp. This is
-	 * required to keep _gp and ra aligned to 8 byte.
-	 */
-	.align	PTRLOG
-	bal	1f
-	 nop
-	PTR	_gp
-1:
-	PTR_L	gp, 0(ra)
-
 #ifdef CONFIG_MIPS_CM
 	PTR_LA	t9, mips_cm_map
 	jalr	t9
@@ -291,121 +279,3 @@ wr_done:
 	 move	ra, zero
 
 	END(_start)
-
-/*
- * void relocate_code (addr_sp, gd, addr_moni)
- *
- * This "function" does not return, instead it continues in RAM
- * after relocating the monitor code.
- *
- * a0 = addr_sp
- * a1 = gd
- * a2 = destination address
- */
-ENTRY(relocate_code)
-	move	sp, a0			# set new stack pointer
-	move	fp, sp
-
-	move	s0, a1			# save gd in s0
-	move	s2, a2			# save destination address in s2
-
-	PTR_LI	t0, CONFIG_SYS_MONITOR_BASE
-	PTR_SUB	s1, s2, t0		# s1 <-- relocation offset
-
-	PTR_LA	t2, __image_copy_end
-	move	t1, a2
-
-	/*
-	 * t0 = source address
-	 * t1 = target address
-	 * t2 = source end address
-	 */
-1:
-	PTR_L	t3, 0(t0)
-	PTR_S	t3, 0(t1)
-	PTR_ADDU t0, PTRSIZE
-	blt	t0, t2, 1b
-	 PTR_ADDU t1, PTRSIZE
-
-	/*
-	 * Now we want to update GOT.
-	 *
-	 * GOT[0] is reserved. GOT[1] is also reserved for the dynamic object
-	 * generated by GNU ld. Skip these reserved entries from relocation.
-	 */
-	PTR_LA	t3, num_got_entries
-	PTR_LA	t8, _GLOBAL_OFFSET_TABLE_
-	PTR_ADD	t8, s1			# t8 now holds relocated _G_O_T_
-	PTR_ADDIU t8, t8, 2 * PTRSIZE	# skipping first two entries
-	PTR_LI	t2, 2
-1:
-	PTR_L	t1, 0(t8)
-	beqz	t1, 2f
-	 PTR_ADD t1, s1
-	PTR_S	t1, 0(t8)
-2:
-	PTR_ADDIU t2, 1
-	blt	t2, t3, 1b
-	 PTR_ADDIU t8, PTRSIZE
-
-	/* Update dynamic relocations */
-	PTR_LA	t1, __rel_dyn_start
-	PTR_LA	t2, __rel_dyn_end
-
-	b	2f			# skip first reserved entry
-	 PTR_ADDIU t1, 2 * PTRSIZE
-
-1:
-	lw	t8, -4(t1)		# t8 <-- relocation info
-
-	PTR_LI	t3, MIPS_RELOC
-	bne	t8, t3, 2f		# skip non-MIPS_RELOC entries
-	 nop
-
-	PTR_L	t3, -(2 * PTRSIZE)(t1)	# t3 <-- location to fix up in FLASH
-
-	PTR_L	t8, 0(t3)		# t8 <-- original pointer
-	PTR_ADD	t8, s1			# t8 <-- adjusted pointer
-
-	PTR_ADD	t3, s1			# t3 <-- location to fix up in RAM
-	PTR_S	t8, 0(t3)
-
-2:
-	blt	t1, t2, 1b
-	 PTR_ADDIU t1, 2 * PTRSIZE	# each rel.dyn entry is 2*PTRSIZE bytes
-
-	/*
-	 * Flush caches to ensure our newly modified instructions are visible
-	 * to the instruction cache. We're still running with the old GOT, so
-	 * apply the reloc offset to the start address.
-	 */
-	PTR_LA	a0, __text_start
-	PTR_LA	a1, __text_end
-	PTR_SUB	a1, a1, a0
-	PTR_LA	t9, flush_cache
-	jalr	t9
-	 PTR_ADD	a0, s1
-
-	PTR_ADD	gp, s1			# adjust gp
-
-	/*
-	 * Clear BSS
-	 *
-	 * GOT is now relocated. Thus __bss_start and __bss_end can be
-	 * accessed directly via $gp.
-	 */
-	PTR_LA	t1, __bss_start		# t1 <-- __bss_start
-	PTR_LA	t2, __bss_end		# t2 <-- __bss_end
-
-1:
-	PTR_S	zero, 0(t1)
-	blt	t1, t2, 1b
-	 PTR_ADDIU t1, PTRSIZE
-
-	move	a0, s0			# a0 <-- gd
-	move	a1, s2
-	PTR_LA	t9, board_init_r
-	jr	t9
-	 move	ra, zero
-
-	END(relocate_code)
diff --git a/arch/mips/cpu/u-boot.lds b/arch/mips/cpu/u-boot.lds
index 0129c99..bd5536f 100644
--- a/arch/mips/cpu/u-boot.lds
+++ b/arch/mips/cpu/u-boot.lds
@@ -34,15 +34,6 @@ SECTIONS
 		*(.data*)
 	}
 
-	. = .;
-	_gp = ALIGN(16) + 0x7ff0;
-
-	.got : {
-		*(.got)
-	}
-
-	num_got_entries = SIZEOF(.got) >> PTR_COUNT_SHIFT;
-
 	. = ALIGN(4);
 	.sdata : {
 		*(.sdata*)
@@ -57,33 +48,19 @@ SECTIONS
 	__image_copy_end = .;
 	__init_end = .;
 
-	.rel.dyn : {
-		__rel_dyn_start = .;
-		*(.rel.dyn)
-		__rel_dyn_end = .;
-	}
-
-	.padding : {
-		/*
-		 * Workaround for a binutils feature (or bug?).
-		 *
-		 * The GNU ld from binutils puts the dynamic relocation
-		 * entries into the .rel.dyn section. Sometimes it
-		 * allocates more dynamic relocation entries than it needs
-		 * and the unused slots are set to R_MIPS_NONE entries.
-		 *
-		 * However the size of the .rel.dyn section in the ELF
-		 * section header does not cover the unused entries, so
-		 * objcopy removes those during stripping.
-		 *
-		 * Create a small section here to avoid that.
-		 */
-		LONG(0xFFFFFFFF)
+	/*
+	 * .rel must come last so that the mips-relocs tool can shrink
+	 * the section size & the PT_LOAD program header filesz.
+	 */
+	.rel : {
+		__rel_start = .;
+		BYTE(0x0)
+		. += (32 * 1024) - 1;
 	}
 
 	_end = .;
 
-	.bss __rel_dyn_start (OVERLAY) : {
+	.bss __rel_start (OVERLAY) : {
 		__bss_start = .;
 		*(.sbss.*)
 		*(.bss.*)
author	Paul Burton <paul.burton@imgtec.com>	2017-06-19 11:53:47 -0700
committer	Daniel Schwierzeck <daniel.schwierzeck@gmail.com>	2017-07-25 20:44:00 +0200
commit	703ec9ddf965063cd79910df281657b056879368 (patch)
tree	480721684cc83e43b00d3ce3c2cd5fc92390daf4 /arch/mips/cpu
parent	09bebb8397f72340812de577a2d00612c7b7ed75 (diff)
download	u-boot-703ec9ddf965063cd79910df281657b056879368.zip u-boot-703ec9ddf965063cd79910df281657b056879368.tar.gz u-boot-703ec9ddf965063cd79910df281657b056879368.tar.bz2