From 9547754f40ee5c5e3d1dbed0fbc972caacd075e8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 26 May 2023 18:00:08 +0100 Subject: hw/cxl: QMP based poison injection support Inject poison using QMP command cxl-inject-poison to add an entry to the poison list. For now, the poison is not returned CXL.mem reads, but only via the mailbox command Get Poison List. So a normal memory read to an address that is on the poison list will not yet result in a synchronous exception (and similar for partial cacheline writes). That is left for a future patch. See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h) Kernel patches to use this interface here: https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofield@intel.com/ To inject poison using QMP (telnet to the QMP port) { "execute": "qmp_capabilities" } { "execute": "cxl-inject-poison", "arguments": { "path": "/machine/peripheral/cxl-pmem0", "start": 2048, "length": 256 } } Adjusted to select a device on your machine. Note that the poison list supported is kept short enough to avoid the complexity of state machine that is needed to handle the MORE flag. Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Acked-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20230526170010.574-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/cxl.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'qapi') diff --git a/qapi/cxl.json b/qapi/cxl.json index b21c9b4..ed1c7ee 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -6,6 +6,27 @@ ## ## +# @cxl-inject-poison: +# +# Poison records indicate that a CXL memory device knows that a +# particular memory region may be corrupted. This may be because of +# locally detected errors (e.g. ECC failure) or poisoned writes +# received from other components in the system. This injection +# mechanism enables testing of the OS handling of poison records which +# may be queried via the CXL mailbox. +# +# @path: CXL type 3 device canonical QOM path +# +# @start: Start address; must be 64 byte aligned. +# +# @length: Length of poison to inject; must be a multiple of 64 bytes. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-poison', + 'data': { 'path': 'str', 'start': 'uint64', 'length': 'size' }} + +## # @CxlUncorErrorType: # # Type of uncorrectable CXL error to inject. These errors are -- cgit v1.1 From ea9b6d647f2f4708708d19ba1cb17d332d3eff06 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 30 May 2023 14:36:01 +0100 Subject: hw/cxl/events: Add injection of General Media Events To facilitate testing provide a QMP command to inject a general media event. The event can be added to the log specified. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Acked-by: Markus Armbruster Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-6-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/cxl.json | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'qapi') diff --git a/qapi/cxl.json b/qapi/cxl.json index ed1c7ee..d509430 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -6,6 +6,80 @@ ## ## +# @CxlEventLog: +# +# CXL has a number of separate event logs for different types of +# events. Each such event log is handled and signaled independently. +# +# @informational: Information Event Log +# +# @warning: Warning Event Log +# +# @failure: Failure Event Log +# +# @fatal: Fatal Event Log +# +# Since: 8.1 +## +{ 'enum': 'CxlEventLog', + 'data': ['informational', + 'warning', + 'failure', + 'fatal'] + } + +## +# @cxl-inject-general-media-event: +# +# Inject an event record for a General Media Event (CXL r3.0 +# 8.2.9.2.1.1). This event type is reported via one of the event logs +# specified via the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-43 General +# Media Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-43 General Media Event +# Record, Memory Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-43 +# General Media Event Record, Memory Event Type for possible +# values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-43 General Media Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @device: Bitmask that represents all devices in the rank associated +# with the memory event location. +# +# @component-id: Device specific component identifier for the event. +# May describe a field replaceable sub-component of the device. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-general-media-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', + '*device': 'uint32', '*component-id': 'str' } } + +## # @cxl-inject-poison: # # Poison records indicate that a CXL memory device knows that a -- cgit v1.1 From b90a324eda7113b62b558aad43e2166eb52567d2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 30 May 2023 14:36:02 +0100 Subject: hw/cxl/events: Add injection of DRAM events Defined in CXL r3.0 8.2.9.2.1.2 DRAM Event Record, this event provides information related to DRAM devices. Example injection command in QMP: { "execute": "cxl-inject-dram-event", "arguments": { "path": "/machine/peripheral/cxl-mem0", "log": "informational", "flags": 1, "dpa": 1000, "descriptor": 3, "type": 3, "transaction-type": 192, "channel": 3, "rank": 17, "nibble-mask": 37421234, "bank-group": 7, "bank": 11, "row": 2, "column": 77, "correction-mask": [33, 44, 55,66] }} Acked-by: Markus Armbruster Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-7-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/cxl.json | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'qapi') diff --git a/qapi/cxl.json b/qapi/cxl.json index d509430..2ad3103 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -80,6 +80,67 @@ '*device': 'uint32', '*component-id': 'str' } } ## +# @cxl-inject-dram-event: +# +# Inject an event record for a DRAM Event (CXL r3.0 8.2.9.2.1.2). +# This event type is reported via one of the event logs specified via +# the log parameter. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @dpa: Device Physical Address (relative to @path device). Note +# lower bits include some flags. See CXL r3.0 Table 8-44 DRAM +# Event Record, Physical Address. +# +# @descriptor: Memory Event Descriptor with additional memory event +# information. See CXL r3.0 Table 8-44 DRAM Event Record, Memory +# Event Descriptor for bit definitions. +# +# @type: Type of memory event that occurred. See CXL r3.0 Table 8-44 +# DRAM Event Record, Memory Event Type for possible values. +# +# @transaction-type: Type of first transaction that caused the event +# to occur. See CXL r3.0 Table 8-44 DRAM Event Record, +# Transaction Type for possible values. +# +# @channel: The channel of the memory event location. A channel is an +# interface that can be independently accessed for a transaction. +# +# @rank: The rank of the memory event location. A rank is a set of +# memory devices on a channel that together execute a transaction. +# +# @nibble-mask: Identifies one or more nibbles that the error affects +# +# @bank-group: Bank group of the memory event location, incorporating +# a number of Banks. +# +# @bank: Bank of the memory event location. A single bank is accessed +# per read or write of the memory. +# +# @row: Row address within the DRAM. +# +# @column: Column address within the DRAM. +# +# @correction-mask: Bits within each nibble. Used in order of bits +# set in the nibble-mask. Up to 4 nibbles may be covered. +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-dram-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags': 'uint8', + 'dpa': 'uint64', 'descriptor': 'uint8', + 'type': 'uint8', 'transaction-type': 'uint8', + '*channel': 'uint8', '*rank': 'uint8', '*nibble-mask': 'uint32', + '*bank-group': 'uint8', '*bank': 'uint8', '*row': 'uint32', + '*column': 'uint16', '*correction-mask': [ 'uint64' ] + }} + +## # @cxl-inject-poison: # # Poison records indicate that a CXL memory device knows that a -- cgit v1.1 From bafe03083255da3a053144b77a5fbc7dbf9494f3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 30 May 2023 14:36:03 +0100 Subject: hw/cxl/events: Add injection of Memory Module Events These events include a copy of the device health information at the time of the event. Actually using the emulated device health would require a lot of controls to manipulate that state. Given the aim of this injection code is to just test the flows when events occur, inject the contents of the device health state as well. Future work may add more sophisticate device health emulation including direct generation of these records when events occur (such as a temperature threshold being crossed). That does not reduce the usefulness of this more basic generation of the events. Acked-by: Markus Armbruster Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Jonathan Cameron Message-Id: <20230530133603.16934-8-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qapi/cxl.json | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'qapi') diff --git a/qapi/cxl.json b/qapi/cxl.json index 2ad3103..d5b5293 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -141,6 +141,59 @@ }} ## +# @cxl-inject-memory-module-event: +# +# Inject an event record for a Memory Module Event (CXL r3.0 +# 8.2.9.2.1.3). This event includes a copy of the Device Health +# info at the time of the event. +# +# @path: CXL type 3 device canonical QOM path +# +# @log: Event Log to add the event to +# +# @flags: Event Record Flags. See CXL r3.0 Table 8-42 Common Event +# Record Format, Event Record Flags for subfield definitions. +# +# @type: Device Event Type. See CXL r3.0 Table 8-45 Memory Module +# Event Record for bit definitions for bit definiions. +# +# @health-status: Overall health summary bitmap. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Health Status for bit +# definitions. +# +# @media-status: Overall media health summary. See CXL r3.0 Table +# 8-100 Get Health Info Output Payload, Media Status for bit +# definitions. +# +# @additional-status: See CXL r3.0 Table 8-100 Get Health Info Output +# Payload, Additional Status for subfield definitions. +# +# @life-used: Percentage (0-100) of factory expected life span. +# +# @temperature: Device temperature in degrees Celsius. +# +# @dirty-shutdown-count: Number of times the device has been unable +# to determine whether data loss may have occurred. +# +# @corrected-volatile-error-count: Total number of correctable errors +# in volatile memory. +# +# @corrected-persistent-error-count: Total number of correctable +# errors in persistent memory +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-memory-module-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', + 'type': 'uint8', 'health-status': 'uint8', + 'media-status': 'uint8', 'additional-status': 'uint8', + 'life-used': 'uint8', 'temperature' : 'int16', + 'dirty-shutdown-count': 'uint32', + 'corrected-volatile-error-count': 'uint32', + 'corrected-persistent-error-count': 'uint32' + }} + +## # @cxl-inject-poison: # # Poison records indicate that a CXL memory device knows that a -- cgit v1.1