aboutsummaryrefslogtreecommitdiff
path: root/core/mce.c
blob: a07eeb6a81c37f30a54a019607fcdb2a97709afe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// SPDX-License-Identifier: Apache-2.0
/*
 * Machine Check Exceptions
 *
 * Copyright 2020 IBM Corp.
 */

#define pr_fmt(fmt)	"MCE: " fmt

#include <ras.h>
#include <opal.h>
#include <cpu.h>

#define SRR1_MC_LOADSTORE(srr1)	((srr1) & PPC_BIT(42))

struct mce_ierror_table {
	unsigned long srr1_mask;
	unsigned long srr1_value;
	uint64_t type;
	const char *error_str;
};

static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000,
  MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA,
  "instruction fetch memory uncorrectable error", },
{ 0x00000000081c0000, 0x0000000000080000,
  MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
  "instruction fetch SLB parity error", },
{ 0x00000000081c0000, 0x00000000000c0000,
  MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA,
  "instruction fetch SLB multi-hit error", },
{ 0x00000000081c0000, 0x0000000000100000,
  MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
  "instruction fetch ERAT multi-hit error", },
{ 0x00000000081c0000, 0x0000000000140000,
  MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR,
  "instruction fetch TLB multi-hit error", },
{ 0x00000000081c0000, 0x0000000000180000,
  MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "instruction fetch page table access memory uncorrectable error", },
{ 0x00000000081c0000, 0x00000000001c0000,
  MCE_INSNFETCH | MCE_INVOLVED_EA,
  "instruction fetch to foreign address", },
{ 0x00000000081c0000, 0x0000000008000000,
  MCE_INSNFETCH | MCE_INVOLVED_EA,
  "instruction fetch foreign link time-out", },
{ 0x00000000081c0000, 0x0000000008040000,
  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "instruction fetch page table access foreign link time-out", },
{ 0x00000000081c0000, 0x00000000080c0000,
  MCE_INSNFETCH | MCE_INVOLVED_EA,
  "instruction fetch real address error", },
{ 0x00000000081c0000, 0x0000000008100000,
  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "instruction fetch page table access real address error", },
{ 0x00000000081c0000, 0x0000000008140000,
  MCE_LOADSTORE | MCE_IMPRECISE,
  "store real address asynchronous error", },
{ 0x00000000081c0000, 0x0000000008180000,
  MCE_LOADSTORE | MCE_IMPRECISE,
  "store foreign link time-out asynchronous error", },
{ 0x00000000081c0000, 0x00000000081c0000,
  MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "instruction fetch page table access to foreign address", },
{ 0 } };

struct mce_derror_table {
	unsigned long dsisr_value;
	uint64_t type;
	const char *error_str;
};

static const struct mce_derror_table mce_p9_derror_table[] = {
{ 0x00008000,
  MCE_LOADSTORE | MCE_MEMORY_ERROR,
  "load/store memory uncorrectable error", },
{ 0x00004000,
  MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "load/store page table access memory uncorrectable error", },
{ 0x00002000,
  MCE_LOADSTORE | MCE_INVOLVED_EA,
  "load/store foreign link time-out", },
{ 0x00001000,
  MCE_LOADSTORE | MCE_TABLE_WALK | MCE_INVOLVED_EA,
  "load/store page table access foreign link time-out", },
{ 0x00000800,
  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR,
  "load/store ERAT multi-hit error", },
{ 0x00000400,
  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR,
  "load/store TLB multi-hit error", },
{ 0x00000200,
  MCE_LOADSTORE | MCE_TLBIE_ERROR,
  "TLBIE or TLBIEL instruction programming error", },
{ 0x00000100,
  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
  "load/store SLB parity error", },
{ 0x00000080,
  MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR,
  "load/store SLB multi-hit error", },
{ 0x00000040,
  MCE_LOADSTORE | MCE_INVOLVED_EA,
  "load real address error", },
{ 0x00000020,
  MCE_LOADSTORE | MCE_TABLE_WALK,
  "load/store page table access real address error", },
{ 0x00000010,
  MCE_LOADSTORE | MCE_TABLE_WALK,
  "load/store page table access to foreign address", },
{ 0x00000008,
  MCE_LOADSTORE,
  "load/store to foreign address", },
{ 0 } };

static void decode_ierror(const struct mce_ierror_table table[],
				uint64_t srr1,
				uint64_t *type,
				const char **error_str)
{
	int i;

	for (i = 0; table[i].srr1_mask; i++) {
		if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
			continue;

		*type = table[i].type;
		*error_str = table[i].error_str;
	}
}

static void decode_derror(const struct mce_derror_table table[],
		uint32_t dsisr,
		uint64_t *type,
		const char **error_str)
{
	int i;

	for (i = 0; table[i].dsisr_value; i++) {
		if (!(dsisr & table[i].dsisr_value))
			continue;

		*type = table[i].type;
		*error_str = table[i].error_str;
	}
}

void decode_mce(uint64_t srr0, uint64_t srr1,
		uint32_t dsisr, uint64_t dar,
		uint64_t *type, const char **error_str,
		uint64_t *address)
{
	*type = MCE_UNKNOWN;
	*error_str = "unknown error";
	*address = 0;

	if (proc_gen != proc_gen_p9) {
		*error_str = "unknown error (processor not supported)";
		return;
	}

	/*
	 * On POWER9 DD2.1 and below, it's possible to get a machine check
	 * caused by a paste instruction where only DSISR bit 25 is set. This
	 * will result in the MCE handler seeing an unknown event and the
	 * kernel crashing. An MCE that occurs like this is spurious, so we
	 * don't need to do anything in terms of servicing it. If there is
	 * something that needs to be serviced, the CPU will raise the MCE
	 * again with the correct DSISR so that it can be serviced properly.
	 * So detect this case and mark it as handled.
	 */
	if (SRR1_MC_LOADSTORE(srr1) && dsisr == 0x02000000) {
		*type = MCE_NO_ERROR;
		*error_str = "no error (superfluous machine check)";
		return;
	}

	if (SRR1_MC_LOADSTORE(srr1)) {
		decode_derror(mce_p9_derror_table, dsisr, type, error_str);
		if (*type & MCE_INVOLVED_EA)
			*address = dar;
	} else {
		decode_ierror(mce_p9_ierror_table, srr1, type, error_str);
		if (*type & MCE_INVOLVED_EA)
			*address = srr0;
	}
}