aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Rini <trini@konsulko.com>2022-03-02 13:59:33 -0500
committerTom Rini <trini@konsulko.com>2022-03-02 13:59:33 -0500
commit2dfdba4a5a48de33a3cedc908c183b86b6ff7bd5 (patch)
treeda9d57e285034143ed1e9cd4dfc30577792275ad
parentf861ffa660dc47c4017c220b94d10a64439d46a7 (diff)
parent5ff4857d3569710c0f1ce1848f1e7486e3a4cfbe (diff)
downloadu-boot-2dfdba4a5a48de33a3cedc908c183b86b6ff7bd5.zip
u-boot-2dfdba4a5a48de33a3cedc908c183b86b6ff7bd5.tar.gz
u-boot-2dfdba4a5a48de33a3cedc908c183b86b6ff7bd5.tar.bz2
Merge branch '2022-03-02-armv8-fixes-and-cleanups' into next
To quote the author: I was looking into the arm64 boot code lately and stumbled upon some issues. Also Nishanth brought back memories of a lengthy debug session, which was caused due to U-Boot keeping SErrors masked. As the resulting patches are all somewhat related, I gathered this series here to address those problems. Patches 1 to 3 address exception handling issues, with the SError enablement being the most prominent fix here. Patch 4 cleans up asm/io.h. This was on the list before[1], but was somehow lost when it was intercepted by a shorter version of itself. Patches 5 and 6 clean up some unnecessarily complicated AArch64 assembly code.
-rw-r--r--arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S11
-rw-r--r--arch/arm/cpu/armv8/start.S10
-rw-r--r--arch/arm/include/asm/io.h98
-rw-r--r--arch/arm/include/asm/macro.h37
-rw-r--r--arch/arm/include/asm/system.h1
-rw-r--r--arch/arm/mach-rmobile/lowlevel_init_gen3.S2
-rw-r--r--arch/arm/mach-socfpga/lowlevel_init_soc64.S2
-rw-r--r--board/cortina/presidio-asic/lowlevel_init.S2
-rw-r--r--cmd/arm/exception64.c64
9 files changed, 85 insertions, 142 deletions
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
index 3aa1a9c..2fb4e40 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
+++ b/arch/arm/cpu/armv8/fsl-layerscape/lowlevel.S
@@ -74,15 +74,6 @@ ENDPROC(smp_kick_all_cpus)
ENTRY(lowlevel_init)
mov x29, lr /* Save LR */
- /* unmask SError and abort */
- msr daifclr, #4
-
- /* Set HCR_EL2[AMO] so SError @EL2 is taken */
- mrs x0, hcr_el2
- orr x0, x0, #0x20 /* AMO */
- msr hcr_el2, x0
- isb
-
switch_el x1, 1f, 100f, 100f /* skip if not in EL3 */
1:
@@ -209,7 +200,7 @@ ENTRY(lowlevel_init)
#endif
100:
- branch_if_master x0, x1, 2f
+ branch_if_master x0, 2f
#if defined(CONFIG_MP) && defined(CONFIG_ARMV8_MULTIENTRY)
/*
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index 91b00a4..6a6a4f8 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -126,6 +126,8 @@ pie_fixup_done:
b 0f
2: mrs x1, hcr_el2
tbnz x1, #34, 1f /* HCR_EL2.E2H */
+ orr x1, x1, #HCR_EL2_AMO_EL2 /* Route SErrors to EL2 */
+ msr hcr_el2, x1
set_vbar vbar_el2, x0
mov x0, #0x33ff
msr cptr_el2, x0 /* Enable FP/SIMD */
@@ -134,6 +136,7 @@ pie_fixup_done:
mov x0, #3 << 20
msr cpacr_el1, x0 /* Enable FP/SIMD */
0:
+ msr daifclr, #0x4 /* Unmask SError interrupts */
#ifdef COUNTER_FREQUENCY
branch_if_not_highest_el x0, 4f
@@ -172,11 +175,11 @@ pie_fixup_done:
bl lowlevel_init
#if defined(CONFIG_ARMV8_SPIN_TABLE) && !defined(CONFIG_SPL_BUILD)
- branch_if_master x0, x1, master_cpu
+ branch_if_master x0, master_cpu
b spin_table_secondary_jump
/* never return */
#elif defined(CONFIG_ARMV8_MULTIENTRY)
- branch_if_master x0, x1, master_cpu
+ branch_if_master x0, master_cpu
/*
* Slave CPUs
@@ -189,6 +192,7 @@ slave_cpu:
br x0 /* branch to the given address */
#endif /* CONFIG_ARMV8_MULTIENTRY */
master_cpu:
+ msr SPSel, #1 /* make sure we use SP_ELx */
bl _main
/*-----------------------------------------------------------------------*/
@@ -301,7 +305,7 @@ WEAK(lowlevel_init)
#endif
#ifdef CONFIG_ARMV8_MULTIENTRY
- branch_if_master x0, x1, 2f
+ branch_if_master x0, 2f
/*
* Slave should wait for master clearing spin table.
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 36b8403..89b1015 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -1,45 +1,26 @@
/*
- * linux/include/asm-arm/io.h
+ * I/O device access primitives. Based on early versions from the Linux kernel.
*
* Copyright (C) 1996-2000 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Modifications:
- * 16-Sep-1996 RMK Inlined the inx/outx functions & optimised for both
- * constant addresses and variable addresses.
- * 04-Dec-1997 RMK Moved a lot of this stuff to the new architecture
- * specific IO header files.
- * 27-Mar-1999 PJB Second parameter of memcpy_toio is const..
- * 04-Apr-1999 PJB Added check_signature.
- * 12-Dec-1999 RMK More cleanups
- * 18-Jun-2000 RMK Removed virt_to_* and friends definitions
*/
#ifndef __ASM_ARM_IO_H
#define __ASM_ARM_IO_H
-#ifdef __KERNEL__
-
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/byteorder.h>
#include <asm/memory.h>
#include <asm/barriers.h>
-#if 0 /* XXX###XXX */
-#include <asm/arch/hardware.h>
-#endif /* XXX###XXX */
static inline void sync(void)
{
}
-/*
- * Generic virtual read/write. Note that we don't support half-word
- * read/writes. We define __arch_*[bl] here, and leave __arch_*w
- * to the architecture specific code.
- */
+/* Generic virtual read/write. */
#define __arch_getb(a) (*(volatile unsigned char *)(a))
#define __arch_getw(a) (*(volatile unsigned short *)(a))
#define __arch_getl(a) (*(volatile unsigned int *)(a))
@@ -248,13 +229,6 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
#define clrsetbits_64(addr, clear, set) clrsetbits(64, addr, clear, set)
/*
- * Now, pick up the machine-defined IO definitions
- */
-#if 0 /* XXX###XXX */
-#include <asm/arch/io.h>
-#endif /* XXX###XXX */
-
-/*
* IO port access primitives
* -------------------------
*
@@ -318,24 +292,12 @@ static inline void __raw_readsl(unsigned long addr, void *data, int longlen)
#define readsb(a, d, s) __raw_readsb((unsigned long)a, d, s)
/*
- * DMA-consistent mapping functions. These allocate/free a region of
- * uncached, unwrite-buffered mapped memory space for use with DMA
- * devices. This is the "generic" version. The PCI specific version
- * is in pci.h
- */
-extern void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle);
-extern void consistent_free(void *vaddr, size_t size, dma_addr_t handle);
-extern void consistent_sync(void *vaddr, size_t size, int rw);
-
-/*
* String version of IO memory access ops:
*/
extern void _memcpy_fromio(void *, unsigned long, size_t);
extern void _memcpy_toio(unsigned long, const void *, size_t);
extern void _memset_io(unsigned long, int, size_t);
-extern void __readwrite_bug(const char *fn);
-
/* Optimized copy functions to read from/write to IO sapce */
#ifdef CONFIG_ARM64
#include <cpu_func.h>
@@ -441,62 +403,6 @@ void __memset_io(volatile void __iomem *dst, int c, size_t count)
#define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c))
#endif
-/*
- * If this architecture has ISA IO, then define the isa_read/isa_write
- * macros.
- */
-#ifdef __mem_isa
-
-#define isa_readb(addr) __raw_readb(__mem_isa(addr))
-#define isa_readw(addr) __raw_readw(__mem_isa(addr))
-#define isa_readl(addr) __raw_readl(__mem_isa(addr))
-#define isa_writeb(val,addr) __raw_writeb(val,__mem_isa(addr))
-#define isa_writew(val,addr) __raw_writew(val,__mem_isa(addr))
-#define isa_writel(val,addr) __raw_writel(val,__mem_isa(addr))
-#define isa_memset_io(a,b,c) _memset_io(__mem_isa(a),(b),(c))
-#define isa_memcpy_fromio(a,b,c) _memcpy_fromio((a),__mem_isa(b),(c))
-#define isa_memcpy_toio(a,b,c) _memcpy_toio(__mem_isa((a)),(b),(c))
-
-#define isa_eth_io_copy_and_sum(a,b,c,d) \
- eth_copy_and_sum((a),__mem_isa(b),(c),(d))
-
-static inline int
-isa_check_signature(unsigned long io_addr, const unsigned char *signature,
- int length)
-{
- int retval = 0;
- do {
- if (isa_readb(io_addr) != *signature)
- goto out;
- io_addr++;
- signature++;
- length--;
- } while (length);
- retval = 1;
-out:
- return retval;
-}
-
-#else /* __mem_isa */
-
-#define isa_readb(addr) (__readwrite_bug("isa_readb"),0)
-#define isa_readw(addr) (__readwrite_bug("isa_readw"),0)
-#define isa_readl(addr) (__readwrite_bug("isa_readl"),0)
-#define isa_writeb(val,addr) __readwrite_bug("isa_writeb")
-#define isa_writew(val,addr) __readwrite_bug("isa_writew")
-#define isa_writel(val,addr) __readwrite_bug("isa_writel")
-#define isa_memset_io(a,b,c) __readwrite_bug("isa_memset_io")
-#define isa_memcpy_fromio(a,b,c) __readwrite_bug("isa_memcpy_fromio")
-#define isa_memcpy_toio(a,b,c) __readwrite_bug("isa_memcpy_toio")
-
-#define isa_eth_io_copy_and_sum(a,b,c,d) \
- __readwrite_bug("isa_eth_io_copy_and_sum")
-
-#define isa_check_signature(io,sig,len) (0)
-
-#endif /* __mem_isa */
-#endif /* __KERNEL__ */
-
#include <asm-generic/io.h>
#include <iotrace.h>
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index ec0171e..1a1edc9 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -69,12 +69,10 @@ lr .req x30
*/
.macro switch_el, xreg, el3_label, el2_label, el1_label
mrs \xreg, CurrentEL
- cmp \xreg, 0xc
- b.eq \el3_label
- cmp \xreg, 0x8
+ cmp \xreg, #0x8
+ b.gt \el3_label
b.eq \el2_label
- cmp \xreg, 0x4
- b.eq \el1_label
+ b.lt \el1_label
.endm
/*
@@ -123,19 +121,10 @@ lr .req x30
*/
.macro branch_if_slave, xreg, slave_label
#ifdef CONFIG_ARMV8_MULTIENTRY
- /* NOTE: MPIDR handling will be erroneous on multi-cluster machines */
mrs \xreg, mpidr_el1
- tst \xreg, #0xff /* Test Affinity 0 */
- b.ne \slave_label
- lsr \xreg, \xreg, #8
- tst \xreg, #0xff /* Test Affinity 1 */
- b.ne \slave_label
- lsr \xreg, \xreg, #8
- tst \xreg, #0xff /* Test Affinity 2 */
- b.ne \slave_label
- lsr \xreg, \xreg, #16
- tst \xreg, #0xff /* Test Affinity 3 */
- b.ne \slave_label
+ and \xreg, \xreg, 0xffffffffff /* clear bits [63:40] */
+ and \xreg, \xreg, ~0x00ff000000 /* also clear bits [31:24] */
+ cbnz \xreg, \slave_label
#endif
.endm
@@ -143,16 +132,12 @@ lr .req x30
* Branch if current processor is a master,
* choose processor with all zero affinity value as the master.
*/
-.macro branch_if_master, xreg1, xreg2, master_label
+.macro branch_if_master, xreg, master_label
#ifdef CONFIG_ARMV8_MULTIENTRY
- /* NOTE: MPIDR handling will be erroneous on multi-cluster machines */
- mrs \xreg1, mpidr_el1
- lsr \xreg2, \xreg1, #32
- lsl \xreg2, \xreg2, #32
- lsl \xreg1, \xreg1, #40
- lsr \xreg1, \xreg1, #40
- orr \xreg1, \xreg1, \xreg2
- cbz \xreg1, \master_label
+ mrs \xreg, mpidr_el1
+ and \xreg, \xreg, 0xffffffffff /* clear bits [63:40] */
+ and \xreg, \xreg, ~0x00ff000000 /* also clear bits [31:24] */
+ cbz \xreg, \master_label
#else
b \master_label
#endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index f75eea1..87d1c77 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -82,6 +82,7 @@
#define HCR_EL2_RW_AARCH64 (1 << 31) /* EL1 is AArch64 */
#define HCR_EL2_RW_AARCH32 (0 << 31) /* Lower levels are AArch32 */
#define HCR_EL2_HCD_DIS (1 << 29) /* Hypervisor Call disabled */
+#define HCR_EL2_AMO_EL2 (1 << 5) /* Route SErrors to EL2 */
/*
* ID_AA64ISAR1_EL1 bits definitions
diff --git a/arch/arm/mach-rmobile/lowlevel_init_gen3.S b/arch/arm/mach-rmobile/lowlevel_init_gen3.S
index 1df2c40..0d77800 100644
--- a/arch/arm/mach-rmobile/lowlevel_init_gen3.S
+++ b/arch/arm/mach-rmobile/lowlevel_init_gen3.S
@@ -64,7 +64,7 @@ ENTRY(lowlevel_init)
#endif
#endif
- branch_if_master x0, x1, 2f
+ branch_if_master x0, 2f
/*
* Slave should wait for master clearing spin table.
diff --git a/arch/arm/mach-socfpga/lowlevel_init_soc64.S b/arch/arm/mach-socfpga/lowlevel_init_soc64.S
index 612ea8a..875927c 100644
--- a/arch/arm/mach-socfpga/lowlevel_init_soc64.S
+++ b/arch/arm/mach-socfpga/lowlevel_init_soc64.S
@@ -38,7 +38,7 @@ slave_wait_atf:
#endif
#ifdef CONFIG_ARMV8_MULTIENTRY
- branch_if_master x0, x1, 2f
+ branch_if_master x0, 2f
/*
* Slave should wait for master clearing spin table.
diff --git a/board/cortina/presidio-asic/lowlevel_init.S b/board/cortina/presidio-asic/lowlevel_init.S
index 4450a5d..cbf8134 100644
--- a/board/cortina/presidio-asic/lowlevel_init.S
+++ b/board/cortina/presidio-asic/lowlevel_init.S
@@ -50,7 +50,7 @@ skip_smp_setup:
#endif
#ifdef CONFIG_ARMV8_MULTIENTRY
- branch_if_master x0, x1, 2f
+ branch_if_master x0, 2f
/*
* Slave should wait for master clearing spin table.
diff --git a/cmd/arm/exception64.c b/cmd/arm/exception64.c
index d5de50a..589a231 100644
--- a/cmd/arm/exception64.c
+++ b/cmd/arm/exception64.c
@@ -7,19 +7,73 @@
#include <common.h>
#include <command.h>
+#include <linux/bitops.h>
static int do_undefined(struct cmd_tbl *cmdtp, int flag, int argc,
char *const argv[])
{
/*
- * 0xe7f...f. is undefined in ARM mode
- * 0xde.. is undefined in Thumb mode
+ * Instructions starting with the upper 16 bits all 0 are permanently
+ * undefined. The lower 16 bits can be used for some kind of immediate.
+ * --- ARMv8 ARM (ARM DDI 0487G.a C6.2.339: "UDF")
*/
- asm volatile (".word 0xe7f7defb\n");
+ asm volatile (".word 0x00001234\n");
+
+ return CMD_RET_FAILURE;
+}
+
+/*
+ * The ID_AA64MMFR2_EL1 register name is only know to binutils for ARMv8.2
+ * and later architecture revisions. However the register is valid regardless
+ * of binutils architecture support or the core the code is running on, so
+ * just use the generic encoding.
+ */
+#define ID_AA64MMFR2_EL1 "S3_0_C0_C7_2"
+
+static int do_unaligned(struct cmd_tbl *cmdtp, int flag, int argc,
+ char *const argv[])
+{
+ uint64_t reg;
+
+ /*
+ * The unaligned LDAR access below is only guaranteed to generate an
+ * alignment fault on cores not implementing FEAT_LSE2. To avoid false
+ * negatives, check this condition before we exectute LDAR.
+ */
+ asm ("mrs %0, "ID_AA64MMFR2_EL1"\n" : "=r" (reg));
+ if (reg & GENMASK(35, 32)) {
+ printf("unaligned access check only supported on pre-v8.4 cores\n");
+ return CMD_RET_FAILURE;
+ }
+
+ /*
+ * The load acquire instruction requires the data source to be
+ * naturally aligned, and will fault even if strict alignment fault
+ * checking is disabled (but only without FEAT_LSE2).
+ * --- ARMv8 ARM (ARM DDI 0487G.a B2.5.2: "Alignment of data accesses")
+ */
+ asm volatile (
+ "mov x1, sp\n\t"
+ "orr x1, x1, #3\n\t"
+ "ldar x0, [x1]\n"
+ ::: "x0", "x1" );
+
+ return CMD_RET_FAILURE;
+}
+
+static int do_breakpoint(struct cmd_tbl *cmdtp, int flag, int argc,
+ char *const argv[])
+{
+ asm volatile ("brk #123\n");
+
return CMD_RET_FAILURE;
}
static struct cmd_tbl cmd_sub[] = {
+ U_BOOT_CMD_MKENT(breakpoint, CONFIG_SYS_MAXARGS, 1, do_breakpoint,
+ "", ""),
+ U_BOOT_CMD_MKENT(unaligned, CONFIG_SYS_MAXARGS, 1, do_unaligned,
+ "", ""),
U_BOOT_CMD_MKENT(undefined, CONFIG_SYS_MAXARGS, 1, do_undefined,
"", ""),
};
@@ -27,7 +81,9 @@ static struct cmd_tbl cmd_sub[] = {
static char exception_help_text[] =
"<ex>\n"
" The following exceptions are available:\n"
- " undefined - undefined instruction\n"
+ " breakpoint - breakpoint instruction exception\n"
+ " unaligned - unaligned LDAR data abort\n"
+ " undefined - undefined instruction exception\n"
;
#include <exception.h>