aboutsummaryrefslogtreecommitdiff
path: root/scripts/arm_processor_error.py
blob: 73d069f070d4402ad2a3f60a10073c05953debc5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
#!/usr/bin/env python3
#
# pylint: disable=C0301,C0114,R0903,R0912,R0913,R0914,R0915,W0511
# SPDX-License-Identifier: GPL-2.0-or-later
#
# Copyright (C) 2024-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

# TODO: current implementation has dummy defaults.
#
# For a better implementation, a QMP addition/call is needed to
# retrieve some data for ARM Processor Error injection:
#
#   - ARM registers: power_state, mpidr.

"""
Generate an ARM processor error CPER, compatible with
UEFI 2.9A Errata.

Injecting such errors can be done using:

    $ ./scripts/ghes_inject.py arm
    Error injected.

Produces a simple CPER register, as detected on a Linux guest:

[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
[Hardware Error]: event severity: recoverable
[Hardware Error]:  Error 0, type: recoverable
[Hardware Error]:   section_type: ARM processor error
[Hardware Error]:   MIDR: 0x0000000000000000
[Hardware Error]:   running state: 0x0
[Hardware Error]:   Power State Coordination Interface state: 0
[Hardware Error]:   Error info structure 0:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x02: cache error
[Hardware Error]:    error_info: 0x000000000091000f
[Hardware Error]:     transaction type: Data Access
[Hardware Error]:     cache error, operation type: Data write
[Hardware Error]:     cache level: 2
[Hardware Error]:     processor context not corrupted
[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error

The ARM Processor Error message can be customized via command line
parameters. For instance:

    $ ./scripts/ghes_inject.py arm --mpidr 0x444 --running --affinity 1 \
        --error-info 12345678 --vendor 0x13,123,4,5,1 --ctx-array 0,1,2,3,4,5 \
        -t cache tlb bus micro-arch tlb,micro-arch
    Error injected.

Injects this error, as detected on a Linux guest:

[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1
[Hardware Error]: event severity: recoverable
[Hardware Error]:  Error 0, type: recoverable
[Hardware Error]:   section_type: ARM processor error
[Hardware Error]:   MIDR: 0x0000000000000000
[Hardware Error]:   Multiprocessor Affinity Register (MPIDR): 0x0000000000000000
[Hardware Error]:   error affinity level: 0
[Hardware Error]:   running state: 0x1
[Hardware Error]:   Power State Coordination Interface state: 0
[Hardware Error]:   Error info structure 0:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x02: cache error
[Hardware Error]:    error_info: 0x0000000000bc614e
[Hardware Error]:     cache level: 2
[Hardware Error]:     processor context not corrupted
[Hardware Error]:   Error info structure 1:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x04: TLB error
[Hardware Error]:    error_info: 0x000000000054007f
[Hardware Error]:     transaction type: Instruction
[Hardware Error]:     TLB error, operation type: Instruction fetch
[Hardware Error]:     TLB level: 1
[Hardware Error]:     processor context not corrupted
[Hardware Error]:     the error has not been corrected
[Hardware Error]:     PC is imprecise
[Hardware Error]:   Error info structure 2:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x08: bus error
[Hardware Error]:    error_info: 0x00000080d6460fff
[Hardware Error]:     transaction type: Generic
[Hardware Error]:     bus error, operation type: Generic read (type of instruction or data request cannot be determined)
[Hardware Error]:     affinity level at which the bus error occurred: 1
[Hardware Error]:     processor context corrupted
[Hardware Error]:     the error has been corrected
[Hardware Error]:     PC is imprecise
[Hardware Error]:     Program execution can be restarted reliably at the PC associated with the error.
[Hardware Error]:     participation type: Local processor observed
[Hardware Error]:     request timed out
[Hardware Error]:     address space: External Memory Access
[Hardware Error]:     memory access attributes:0x20
[Hardware Error]:     access mode: secure
[Hardware Error]:   Error info structure 3:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x10: micro-architectural error
[Hardware Error]:    error_info: 0x0000000078da03ff
[Hardware Error]:   Error info structure 4:
[Hardware Error]:   num errors: 2
[Hardware Error]:    error_type: 0x14: TLB error|micro-architectural error
[Hardware Error]:   Context info structure 0:
[Hardware Error]:    register context type: AArch64 EL1 context registers
[Hardware Error]:    00000000: 00000000 00000000
[Hardware Error]:   Vendor specific error info has 5 bytes:
[Hardware Error]:    00000000: 13 7b 04 05 01                                   .{...
[Firmware Warn]: GHES: Unhandled processor error type 0x02: cache error
[Firmware Warn]: GHES: Unhandled processor error type 0x04: TLB error
[Firmware Warn]: GHES: Unhandled processor error type 0x08: bus error
[Firmware Warn]: GHES: Unhandled processor error type 0x10: micro-architectural error
[Firmware Warn]: GHES: Unhandled processor error type 0x14: TLB error|micro-architectural error
"""

import argparse
import re

from qmp_helper import qmp, util, cper_guid


class ArmProcessorEinj:
    """
    Implements ARM Processor Error injection via GHES
    """

    DESC = """
    Generates an ARM processor error CPER, compatible with
    UEFI 2.9A Errata.
    """

    ACPI_GHES_ARM_CPER_LENGTH = 40
    ACPI_GHES_ARM_CPER_PEI_LENGTH = 32

    # Context types
    CONTEXT_AARCH32_EL1 = 1
    CONTEXT_AARCH64_EL1 = 5
    CONTEXT_MISC_REG = 8

    def __init__(self, subparsers):
        """Initialize the error injection class and add subparser"""

        # Valid choice values
        self.arm_valid_bits = {
            "mpidr":    util.bit(0),
            "affinity": util.bit(1),
            "running":  util.bit(2),
            "vendor":   util.bit(3),
        }

        self.pei_flags = {
            "first":        util.bit(0),
            "last":         util.bit(1),
            "propagated":   util.bit(2),
            "overflow":     util.bit(3),
        }

        self.pei_error_types = {
            "cache":        util.bit(1),
            "tlb":          util.bit(2),
            "bus":          util.bit(3),
            "micro-arch":   util.bit(4),
        }

        self.pei_valid_bits = {
            "multiple-error":   util.bit(0),
            "flags":            util.bit(1),
            "error-info":       util.bit(2),
            "virt-addr":        util.bit(3),
            "phy-addr":         util.bit(4),
        }

        self.data = bytearray()

        parser = subparsers.add_parser("arm", description=self.DESC)

        arm_valid_bits = ",".join(self.arm_valid_bits.keys())
        flags = ",".join(self.pei_flags.keys())
        error_types = ",".join(self.pei_error_types.keys())
        pei_valid_bits = ",".join(self.pei_valid_bits.keys())

        # UEFI N.16 ARM Validation bits
        g_arm = parser.add_argument_group("ARM processor")
        g_arm.add_argument("--arm", "--arm-valid",
                           help=f"ARM valid bits: {arm_valid_bits}")
        g_arm.add_argument("-a", "--affinity",  "--level", "--affinity-level",
                           type=lambda x: int(x, 0),
                           help="Affinity level (when multiple levels apply)")
        g_arm.add_argument("-l", "--mpidr", type=lambda x: int(x, 0),
                           help="Multiprocessor Affinity Register")
        g_arm.add_argument("-i", "--midr", type=lambda x: int(x, 0),
                           help="Main ID Register")
        g_arm.add_argument("-r", "--running",
                           action=argparse.BooleanOptionalAction,
                           default=None,
                           help="Indicates if the processor is running or not")
        g_arm.add_argument("--psci", "--psci-state",
                           type=lambda x: int(x, 0),
                           help="Power State Coordination Interface - PSCI state")

        # TODO: Add vendor-specific support

        # UEFI N.17 bitmaps (type and flags)
        g_pei = parser.add_argument_group("ARM Processor Error Info (PEI)")
        g_pei.add_argument("-t", "--type", nargs="+",
                        help=f"one or more error types: {error_types}")
        g_pei.add_argument("-f", "--flags", nargs="*",
                        help=f"zero or more error flags: {flags}")
        g_pei.add_argument("-V", "--pei-valid", "--error-valid", nargs="*",
                        help=f"zero or more PEI valid bits: {pei_valid_bits}")

        # UEFI N.17 Integer values
        g_pei.add_argument("-m", "--multiple-error", nargs="+",
                        help="Number of errors: 0: Single error, 1: Multiple errors, 2-65535: Error count if known")
        g_pei.add_argument("-e", "--error-info", nargs="+",
                        help="Error information (UEFI 2.10 tables N.18 to N.20)")
        g_pei.add_argument("-p", "--physical-address",  nargs="+",
                        help="Physical address")
        g_pei.add_argument("-v", "--virtual-address",  nargs="+",
                        help="Virtual address")

        # UEFI N.21 Context
        g_ctx = parser.add_argument_group("Processor Context")
        g_ctx.add_argument("--ctx-type", "--context-type", nargs="*",
                        help="Type of the context (0=ARM32 GPR, 5=ARM64 EL1, other values supported)")
        g_ctx.add_argument("--ctx-size", "--context-size", nargs="*",
                        help="Minimal size of the context")
        g_ctx.add_argument("--ctx-array", "--context-array", nargs="*",
                        help="Comma-separated arrays for each context")

        # Vendor-specific data
        g_vendor = parser.add_argument_group("Vendor-specific data")
        g_vendor.add_argument("--vendor", "--vendor-specific", nargs="+",
                        help="Vendor-specific byte arrays of data")

        # Add arguments for Generic Error Data
        qmp.argparse(parser)

        parser.set_defaults(func=self.send_cper)

    def send_cper(self, args):
        """Parse subcommand arguments and send a CPER via QMP"""

        qmp_cmd = qmp(args.host, args.port, args.debug)

        # Handle Generic Error Data arguments if any
        qmp_cmd.set_args(args)

        is_cpu_type = re.compile(r"^([\w+]+\-)?arm\-cpu$")
        cpus = qmp_cmd.search_qom("/machine/unattached/device",
                                  "type", is_cpu_type)

        cper = {}
        pei = {}
        ctx = {}
        vendor = {}

        arg = vars(args)

        # Handle global parameters
        if args.arm:
            arm_valid_init = False
            cper["valid"] = util.get_choice(name="valid",
                                       value=args.arm,
                                       choices=self.arm_valid_bits,
                                       suffixes=["-error", "-err"])
        else:
            cper["valid"] = 0
            arm_valid_init = True

        if "running" in arg:
            if args.running:
                cper["running-state"] = util.bit(0)
            else:
                cper["running-state"] = 0
        else:
            cper["running-state"] = 0

        if arm_valid_init:
            if args.affinity:
                cper["valid"] |= self.arm_valid_bits["affinity"]

            if args.mpidr:
                cper["valid"] |= self.arm_valid_bits["mpidr"]

            if "running-state" in cper:
                cper["valid"] |= self.arm_valid_bits["running"]

            if args.psci:
                cper["valid"] |= self.arm_valid_bits["running"]

        # Handle PEI
        if not args.type:
            args.type = ["cache-error"]

        util.get_mult_choices(
            pei,
            name="valid",
            values=args.pei_valid,
            choices=self.pei_valid_bits,
            suffixes=["-valid", "--addr"],
        )
        util.get_mult_choices(
            pei,
            name="type",
            values=args.type,
            choices=self.pei_error_types,
            suffixes=["-error", "-err"],
        )
        util.get_mult_choices(
            pei,
            name="flags",
            values=args.flags,
            choices=self.pei_flags,
            suffixes=["-error", "-cap"],
        )
        util.get_mult_int(pei, "error-info", args.error_info)
        util.get_mult_int(pei, "multiple-error", args.multiple_error)
        util.get_mult_int(pei, "phy-addr", args.physical_address)
        util.get_mult_int(pei, "virt-addr", args.virtual_address)

        # Handle context
        util.get_mult_int(ctx, "type", args.ctx_type, allow_zero=True)
        util.get_mult_int(ctx, "minimal-size", args.ctx_size, allow_zero=True)
        util.get_mult_array(ctx, "register", args.ctx_array, allow_zero=True)

        util.get_mult_array(vendor, "bytes", args.vendor, max_val=255)

        # Store PEI
        pei_data = bytearray()
        default_flags  = self.pei_flags["first"]
        default_flags |= self.pei_flags["last"]

        error_info_num = 0

        for i, p in pei.items():        # pylint: disable=W0612
            error_info_num += 1

            # UEFI 2.10 doesn't define how to encode error information
            # when multiple types are raised. So, provide a default only
            # if a single type is there
            if "error-info" not in p:
                if p["type"] == util.bit(1):
                    p["error-info"] = 0x0091000F
                if p["type"] == util.bit(2):
                    p["error-info"] = 0x0054007F
                if p["type"] == util.bit(3):
                    p["error-info"] = 0x80D6460FFF
                if p["type"] == util.bit(4):
                    p["error-info"] = 0x78DA03FF

            if "valid" not in p:
                p["valid"] = 0
                if "multiple-error" in p:
                    p["valid"] |= self.pei_valid_bits["multiple-error"]

                if "flags" in p:
                    p["valid"] |= self.pei_valid_bits["flags"]

                if "error-info" in p:
                    p["valid"] |= self.pei_valid_bits["error-info"]

                if "phy-addr" in p:
                    p["valid"] |= self.pei_valid_bits["phy-addr"]

                if "virt-addr" in p:
                    p["valid"] |= self.pei_valid_bits["virt-addr"]

            # Version
            util.data_add(pei_data, 0, 1)

            util.data_add(pei_data,
                         self.ACPI_GHES_ARM_CPER_PEI_LENGTH, 1)

            util.data_add(pei_data, p["valid"], 2)
            util.data_add(pei_data, p["type"], 1)
            util.data_add(pei_data, p.get("multiple-error", 1), 2)
            util.data_add(pei_data, p.get("flags", default_flags), 1)
            util.data_add(pei_data, p.get("error-info", 0), 8)
            util.data_add(pei_data, p.get("virt-addr", 0xDEADBEEF), 8)
            util.data_add(pei_data, p.get("phy-addr", 0xABBA0BAD), 8)

        # Store Context
        ctx_data = bytearray()
        context_info_num = 0

        if ctx:
            ret = qmp_cmd.send_cmd("query-target", may_open=True)

            default_ctx = self.CONTEXT_MISC_REG

            if "arch" in ret:
                if ret["arch"] == "aarch64":
                    default_ctx = self.CONTEXT_AARCH64_EL1
                elif ret["arch"] == "arm":
                    default_ctx = self.CONTEXT_AARCH32_EL1

            for k in sorted(ctx.keys()):
                context_info_num += 1

                if "type" not in ctx[k]:
                    ctx[k]["type"] = default_ctx

                if "register" not in ctx[k]:
                    ctx[k]["register"] = []

                reg_size = len(ctx[k]["register"])
                size = 0

                if "minimal-size" in ctx:
                    size = ctx[k]["minimal-size"]

                size = max(size, reg_size)

                size = (size + 1) % 0xFFFE

                # Version
                util.data_add(ctx_data, 0, 2)

                util.data_add(ctx_data, ctx[k]["type"], 2)

                util.data_add(ctx_data, 8 * size, 4)

                for r in ctx[k]["register"]:
                    util.data_add(ctx_data, r, 8)

                for i in range(reg_size, size):   # pylint: disable=W0612
                    util.data_add(ctx_data, 0, 8)

        # Vendor-specific bytes are not grouped
        vendor_data = bytearray()
        if vendor:
            for k in sorted(vendor.keys()):
                for b in vendor[k]["bytes"]:
                    util.data_add(vendor_data, b, 1)

        # Encode ARM Processor Error
        data = bytearray()

        util.data_add(data, cper["valid"], 4)

        util.data_add(data, error_info_num, 2)
        util.data_add(data, context_info_num, 2)

        # Calculate the length of the CPER data
        cper_length = self.ACPI_GHES_ARM_CPER_LENGTH
        cper_length += len(pei_data)
        cper_length += len(vendor_data)
        cper_length += len(ctx_data)
        util.data_add(data, cper_length, 4)

        util.data_add(data, arg.get("affinity-level", 0), 1)

        # Reserved
        util.data_add(data, 0, 3)

        if "midr-el1" not in arg:
            if cpus:
                cmd_arg = {
                    'path': cpus[0],
                    'property': "midr"
                }
                ret = qmp_cmd.send_cmd("qom-get", cmd_arg, may_open=True)
                if isinstance(ret, int):
                    arg["midr-el1"] = ret

        util.data_add(data, arg.get("mpidr-el1", 0), 8)
        util.data_add(data, arg.get("midr-el1", 0), 8)
        util.data_add(data, cper["running-state"], 4)
        util.data_add(data, arg.get("psci-state", 0), 4)

        # Add PEI
        data.extend(pei_data)
        data.extend(ctx_data)
        data.extend(vendor_data)

        self.data = data

        qmp_cmd.send_cper(cper_guid.CPER_PROC_ARM, self.data)