diff options
author | Tom Rini <trini@konsulko.com> | 2021-05-18 11:09:41 -0400 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2021-05-18 11:09:41 -0400 |
commit | 52993fcb7672885ea827ef3f24927d650bca2e4e (patch) | |
tree | c4547bdbf6ee732a28f56e4c845e45f7ae565ac6 /arch | |
parent | feddbdb55f47b94f9f6695dc19a826953f893f7a (diff) | |
parent | 89419279f4fe6bfd68dd518059ef2007295f1cb4 (diff) | |
download | u-boot-52993fcb7672885ea827ef3f24927d650bca2e4e.zip u-boot-52993fcb7672885ea827ef3f24927d650bca2e4e.tar.gz u-boot-52993fcb7672885ea827ef3f24927d650bca2e4e.tar.bz2 |
Merge https://source.denx.de/u-boot/custodians/u-boot-riscv
Diffstat (limited to 'arch')
-rw-r--r-- | arch/riscv/Kconfig | 13 | ||||
-rw-r--r-- | arch/riscv/cpu/fu540/Kconfig | 2 | ||||
-rw-r--r-- | arch/riscv/cpu/generic/Kconfig | 3 | ||||
-rw-r--r-- | arch/riscv/include/asm/global_data.h | 2 | ||||
-rw-r--r-- | arch/riscv/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/riscv/lib/fdt_fixup.c | 11 | ||||
-rw-r--r-- | arch/riscv/lib/memcpy.S | 223 | ||||
-rw-r--r-- | arch/riscv/lib/memmove.S | 176 |
8 files changed, 281 insertions, 151 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3f221dc..63665d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -158,7 +158,14 @@ config DMA_ADDR_T_64BIT config SIFIVE_CLINT bool - depends on RISCV_MMODE || SPL_RISCV_MMODE + depends on RISCV_MMODE + help + The SiFive CLINT block holds memory-mapped control and status registers + associated with software and timer interrupts. + +config SPL_SIFIVE_CLINT + bool + depends on SPL_RISCV_MMODE help The SiFive CLINT block holds memory-mapped control and status registers associated with software and timer interrupts. @@ -271,6 +278,8 @@ config STACK_SIZE_SHIFT config OF_BOARD_FIXUP default y if OF_SEPARATE && RISCV_SMODE +menu "Use assembly optimized implementation of memory routines" + config USE_ARCH_MEMCPY bool "Use an assembly optimized implementation of memcpy" default y @@ -350,3 +359,5 @@ config TPL_USE_ARCH_MEMSET but may increase the binary size. endmenu + +endmenu diff --git a/arch/riscv/cpu/fu540/Kconfig b/arch/riscv/cpu/fu540/Kconfig index 616b256..dcf0994 100644 --- a/arch/riscv/cpu/fu540/Kconfig +++ b/arch/riscv/cpu/fu540/Kconfig @@ -11,7 +11,7 @@ config SIFIVE_FU540 imply CPU imply CPU_RISCV imply RISCV_TIMER if (RISCV_SMODE || SPL_RISCV_SMODE) - imply SIFIVE_CLINT if (RISCV_MMODE || SPL_RISCV_MMODE) + imply SPL_SIFIVE_CLINT imply CMD_CPU imply SPL_CPU imply SPL_OPENSBI diff --git a/arch/riscv/cpu/generic/Kconfig b/arch/riscv/cpu/generic/Kconfig index 198e36e..6f73bdd 100644 --- a/arch/riscv/cpu/generic/Kconfig +++ b/arch/riscv/cpu/generic/Kconfig @@ -8,7 +8,8 @@ config GENERIC_RISCV imply CPU imply CPU_RISCV imply RISCV_TIMER if (RISCV_SMODE || SPL_RISCV_SMODE) - imply SIFIVE_CLINT if (RISCV_MMODE || SPL_RISCV_MMODE) + imply SIFIVE_CLINT if RISCV_MMODE + imply SPL_SIFIVE_CLINT if SPL_RISCV_MMODE imply CMD_CPU imply SPL_CPU imply SPL_OPENSBI diff --git a/arch/riscv/include/asm/global_data.h b/arch/riscv/include/asm/global_data.h index d3a0b1d..095484a 100644 --- a/arch/riscv/include/asm/global_data.h +++ b/arch/riscv/include/asm/global_data.h @@ -18,7 +18,7 @@ struct arch_global_data { long boot_hart; /* boot hart id */ phys_addr_t firmware_fdt_addr; -#ifdef CONFIG_SIFIVE_CLINT +#if CONFIG_IS_ENABLED(SIFIVE_CLINT) void __iomem *clint; /* clint base address */ #endif #ifdef CONFIG_ANDES_PLIC diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index d08cbe9..c4cc414 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_CMD_BOOTI) += bootm.o image.o obj-$(CONFIG_CMD_GO) += boot.o obj-y += cache.o ifeq ($(CONFIG_$(SPL_)RISCV_MMODE),y) -obj-$(CONFIG_SIFIVE_CLINT) += sifive_clint.o +obj-$(CONFIG_$(SPL_)SIFIVE_CLINT) += sifive_clint.o obj-$(CONFIG_ANDES_PLIC) += andes_plic.o else obj-$(CONFIG_SBI) += sbi.o diff --git a/arch/riscv/lib/fdt_fixup.c b/arch/riscv/lib/fdt_fixup.c index 1f3f236..f636b28 100644 --- a/arch/riscv/lib/fdt_fixup.c +++ b/arch/riscv/lib/fdt_fixup.c @@ -151,14 +151,17 @@ int arch_fixup_fdt(void *blob) } chosen_offset = fdt_path_offset(blob, "/chosen"); if (chosen_offset < 0) { - err = fdt_add_subnode(blob, 0, "chosen"); - if (err < 0) { + chosen_offset = fdt_add_subnode(blob, 0, "chosen"); + if (chosen_offset < 0) { log_err("chosen node cannot be added\n"); - return err; + return chosen_offset; } } /* Overwrite the boot-hartid as U-Boot is the last stage BL */ - fdt_setprop_u32(blob, chosen_offset, "boot-hartid", gd->arch.boot_hart); + err = fdt_setprop_u32(blob, chosen_offset, "boot-hartid", + gd->arch.boot_hart); + if (err < 0) + return log_msg_ret("could not set boot-hartid", err); #endif /* Copy the reserved-memory node to the DT used by OS */ diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 51ab716..00672c1 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -9,100 +9,151 @@ /* void *memcpy(void *, const void *, size_t) */ ENTRY(__memcpy) WEAK(memcpy) - move t6, a0 /* Preserve return value */ + /* Save for return value */ + mv t6, a0 - /* Defer to byte-oriented copy for small sizes */ - sltiu a3, a2, 128 - bnez a3, 4f - /* Use word-oriented copy only if low-order bits match */ - andi a3, t6, SZREG-1 - andi a4, a1, SZREG-1 - bne a3, a4, 4f + /* + * Register allocation for code below: + * a0 - start of uncopied dst + * a1 - start of uncopied src + * t0 - end of uncopied dst + */ + add t0, a0, a2 - beqz a3, 2f /* Skip if already aligned */ /* - * Round to nearest double word-aligned address - * greater than or equal to start address + * Use bytewise copy if too small. + * + * This threshold must be at least 2*SZREG to ensure at least one + * wordwise copy is performed. It is chosen to be 16 because it will + * save at least 7 iterations of bytewise copy, which pays off the + * fixed overhead. */ - andi a3, a1, ~(SZREG-1) - addi a3, a3, SZREG - /* Handle initial misalignment */ - sub a4, a3, a1 + li a3, 16 + bltu a2, a3, .Lbyte_copy_tail + + /* + * Bytewise copy first to align a0 to word boundary. + */ + addi a2, a0, SZREG-1 + andi a2, a2, ~(SZREG-1) + beq a0, a2, 2f 1: - lb a5, 0(a1) - addi a1, a1, 1 - sb a5, 0(t6) - addi t6, t6, 1 - bltu a1, a3, 1b - sub a2, a2, a4 /* Update count */ + lb a5, 0(a1) + addi a1, a1, 1 + sb a5, 0(a0) + addi a0, a0, 1 + bne a0, a2, 1b +2: + + /* + * Now a0 is word-aligned. If a1 is also word aligned, we could perform + * aligned word-wise copy. Otherwise we need to perform misaligned + * word-wise copy. + */ + andi a3, a1, SZREG-1 + bnez a3, .Lmisaligned_word_copy + /* Unrolled wordwise copy */ + addi t0, t0, -(16*SZREG-1) + bgeu a0, t0, 2f +1: + REG_L a2, 0(a1) + REG_L a3, SZREG(a1) + REG_L a4, 2*SZREG(a1) + REG_L a5, 3*SZREG(a1) + REG_L a6, 4*SZREG(a1) + REG_L a7, 5*SZREG(a1) + REG_L t1, 6*SZREG(a1) + REG_L t2, 7*SZREG(a1) + REG_L t3, 8*SZREG(a1) + REG_L t4, 9*SZREG(a1) + REG_L t5, 10*SZREG(a1) + REG_S a2, 0(a0) + REG_S a3, SZREG(a0) + REG_S a4, 2*SZREG(a0) + REG_S a5, 3*SZREG(a0) + REG_S a6, 4*SZREG(a0) + REG_S a7, 5*SZREG(a0) + REG_S t1, 6*SZREG(a0) + REG_S t2, 7*SZREG(a0) + REG_S t3, 8*SZREG(a0) + REG_S t4, 9*SZREG(a0) + REG_S t5, 10*SZREG(a0) + REG_L a2, 11*SZREG(a1) + REG_L a3, 12*SZREG(a1) + REG_L a4, 13*SZREG(a1) + REG_L a5, 14*SZREG(a1) + REG_L a6, 15*SZREG(a1) + addi a1, a1, 16*SZREG + REG_S a2, 11*SZREG(a0) + REG_S a3, 12*SZREG(a0) + REG_S a4, 13*SZREG(a0) + REG_S a5, 14*SZREG(a0) + REG_S a6, 15*SZREG(a0) + addi a0, a0, 16*SZREG + bltu a0, t0, 1b 2: - andi a4, a2, ~((16*SZREG)-1) - beqz a4, 4f - add a3, a1, a4 -3: - REG_L a4, 0(a1) - REG_L a5, SZREG(a1) - REG_L a6, 2*SZREG(a1) - REG_L a7, 3*SZREG(a1) - REG_L t0, 4*SZREG(a1) - REG_L t1, 5*SZREG(a1) - REG_L t2, 6*SZREG(a1) - REG_L t3, 7*SZREG(a1) - REG_L t4, 8*SZREG(a1) - REG_L t5, 9*SZREG(a1) - REG_S a4, 0(t6) - REG_S a5, SZREG(t6) - REG_S a6, 2*SZREG(t6) - REG_S a7, 3*SZREG(t6) - REG_S t0, 4*SZREG(t6) - REG_S t1, 5*SZREG(t6) - REG_S t2, 6*SZREG(t6) - REG_S t3, 7*SZREG(t6) - REG_S t4, 8*SZREG(t6) - REG_S t5, 9*SZREG(t6) - REG_L a4, 10*SZREG(a1) - REG_L a5, 11*SZREG(a1) - REG_L a6, 12*SZREG(a1) - REG_L a7, 13*SZREG(a1) - REG_L t0, 14*SZREG(a1) - REG_L t1, 15*SZREG(a1) - addi a1, a1, 16*SZREG - REG_S a4, 10*SZREG(t6) - REG_S a5, 11*SZREG(t6) - REG_S a6, 12*SZREG(t6) - REG_S a7, 13*SZREG(t6) - REG_S t0, 14*SZREG(t6) - REG_S t1, 15*SZREG(t6) - addi t6, t6, 16*SZREG - bltu a1, a3, 3b - andi a2, a2, (16*SZREG)-1 /* Update count */ - -4: - /* Handle trailing misalignment */ - beqz a2, 6f - add a3, a1, a2 - - /* Use word-oriented copy if co-aligned to word boundary */ - or a5, a1, t6 - or a5, a5, a3 - andi a5, a5, 3 - bnez a5, 5f -7: - lw a4, 0(a1) - addi a1, a1, 4 - sw a4, 0(t6) - addi t6, t6, 4 - bltu a1, a3, 7b + /* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */ + addi t0, t0, 15*SZREG - ret + /* Wordwise copy */ + bgeu a0, t0, 2f +1: + REG_L a5, 0(a1) + addi a1, a1, SZREG + REG_S a5, 0(a0) + addi a0, a0, SZREG + bltu a0, t0, 1b +2: + addi t0, t0, SZREG-1 -5: - lb a4, 0(a1) - addi a1, a1, 1 - sb a4, 0(t6) - addi t6, t6, 1 - bltu a1, a3, 5b -6: +.Lbyte_copy_tail: + /* + * Bytewise copy anything left. + */ + beq a0, t0, 2f +1: + lb a5, 0(a1) + addi a1, a1, 1 + sb a5, 0(a0) + addi a0, a0, 1 + bne a0, t0, 1b +2: + + mv a0, t6 ret + +.Lmisaligned_word_copy: + /* + * Misaligned word-wise copy. + * For misaligned copy we still perform word-wise copy, but we need to + * use the value fetched from the previous iteration and do some shifts. + * This is safe because we wouldn't access more words than necessary. + */ + + /* Calculate shifts */ + slli t3, a3, 3 + sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */ + + /* Load the initial value and align a1 */ + andi a1, a1, ~(SZREG-1) + REG_L a5, 0(a1) + + addi t0, t0, -(SZREG-1) + /* At least one iteration will be executed here, no check */ +1: + srl a4, a5, t3 + REG_L a5, SZREG(a1) + addi a1, a1, SZREG + sll a2, a5, t4 + or a2, a2, a4 + REG_S a2, 0(a0) + addi a0, a0, SZREG + bltu a0, t0, 1b + + /* Update pointers to correct value */ + addi t0, t0, SZREG-1 + add a1, a1, a3 + + j .Lbyte_copy_tail END(__memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d21..fbe6701 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -5,60 +5,124 @@ ENTRY(__memmove) WEAK(memmove) - move t0, a0 - move t1, a1 - - beq a0, a1, exit_memcpy - beqz a2, exit_memcpy - srli t2, a2, 0x2 - - slt t3, a0, a1 - beqz t3, do_reverse - - andi a2, a2, 0x3 - li t4, 1 - beqz t2, byte_copy - -word_copy: - lw t3, 0(a1) - addi t2, t2, -1 - addi a1, a1, 4 - sw t3, 0(a0) - addi a0, a0, 4 - bnez t2, word_copy - beqz a2, exit_memcpy - j byte_copy - -do_reverse: - add a0, a0, a2 - add a1, a1, a2 - andi a2, a2, 0x3 - li t4, -1 - beqz t2, reverse_byte_copy - -reverse_word_copy: - addi a1, a1, -4 - addi t2, t2, -1 - lw t3, 0(a1) - addi a0, a0, -4 - sw t3, 0(a0) - bnez t2, reverse_word_copy - beqz a2, exit_memcpy - -reverse_byte_copy: - addi a0, a0, -1 - addi a1, a1, -1 - -byte_copy: - lb t3, 0(a1) - addi a2, a2, -1 - sb t3, 0(a0) - add a1, a1, t4 - add a0, a0, t4 - bnez a2, byte_copy - -exit_memcpy: - move a0, t0 - move a1, t1 - ret + /* + * Here we determine if forward copy is possible. Forward copy is + * preferred to backward copy as it is more cache friendly. + * + * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can + * copy forward. + * If a0 < a1, we can always copy forward. This will make t0 negative, + * so a *unsigned* comparison will always have t0 >= a2. + * + * For forward copy we just delegate the task to memcpy. + */ + sub t0, a0, a1 + bltu t0, a2, 1f + tail __memcpy +1: + + /* + * Register allocation for code below: + * a0 - end of uncopied dst + * a1 - end of uncopied src + * t0 - start of uncopied dst + */ + mv t0, a0 + add a0, a0, a2 + add a1, a1, a2 + + /* + * Use bytewise copy if too small. + * + * This threshold must be at least 2*SZREG to ensure at least one + * wordwise copy is performed. It is chosen to be 16 because it will + * save at least 7 iterations of bytewise copy, which pays off the + * fixed overhead. + */ + li a3, 16 + bltu a2, a3, .Lbyte_copy_tail + + /* + * Bytewise copy first to align t0 to word boundary. + */ + andi a2, a0, ~(SZREG-1) + beq a0, a2, 2f +1: + addi a1, a1, -1 + lb a5, 0(a1) + addi a0, a0, -1 + sb a5, 0(a0) + bne a0, a2, 1b +2: + + /* + * Now a0 is word-aligned. If a1 is also word aligned, we could perform + * aligned word-wise copy. Otherwise we need to perform misaligned + * word-wise copy. + */ + andi a3, a1, SZREG-1 + bnez a3, .Lmisaligned_word_copy + + /* Wordwise copy */ + addi t0, t0, SZREG-1 + bleu a0, t0, 2f +1: + addi a1, a1, -SZREG + REG_L a5, 0(a1) + addi a0, a0, -SZREG + REG_S a5, 0(a0) + bgtu a0, t0, 1b +2: + addi t0, t0, -(SZREG-1) + +.Lbyte_copy_tail: + /* + * Bytewise copy anything left. + */ + beq a0, t0, 2f +1: + addi a1, a1, -1 + lb a5, 0(a1) + addi a0, a0, -1 + sb a5, 0(a0) + bne a0, t0, 1b +2: + + mv a0, t0 + ret + +.Lmisaligned_word_copy: + /* + * Misaligned word-wise copy. + * For misaligned copy we still perform word-wise copy, but we need to + * use the value fetched from the previous iteration and do some shifts. + * This is safe because we wouldn't access more words than necessary. + */ + + /* Calculate shifts */ + slli t3, a3, 3 + sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */ + + /* Load the initial value and align a1 */ + andi a1, a1, ~(SZREG-1) + REG_L a5, 0(a1) + + addi t0, t0, SZREG-1 + /* At least one iteration will be executed here, no check */ +1: + sll a4, a5, t4 + addi a1, a1, -SZREG + REG_L a5, 0(a1) + srl a2, a5, t3 + or a2, a2, a4 + addi a0, a0, -SZREG + REG_S a2, 0(a0) + bgtu a0, t0, 1b + + /* Update pointers to correct value */ + addi t0, t0, -(SZREG-1) + add a1, a1, a3 + + j .Lbyte_copy_tail + END(__memmove) |