aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/block/block-common.h419
-rw-r--r--include/block/block-copy.h2
-rw-r--r--include/block/block-global-state.h253
-rw-r--r--include/block/block-io.h368
-rw-r--r--include/block/block.h878
-rw-r--r--include/block/block_int-common.h1246
-rw-r--r--include/block/block_int-global-state.h329
-rw-r--r--include/block/block_int-io.h194
-rw-r--r--include/block/block_int.h1475
-rw-r--r--include/block/blockjob.h29
-rw-r--r--include/block/blockjob_int.h28
-rw-r--r--include/block/dirty-bitmap.h4
-rw-r--r--include/block/nvme.h81
-rw-r--r--include/block/reqlist.h75
-rw-r--r--include/block/snapshot.h13
-rw-r--r--include/exec/cpu-all.h4
-rw-r--r--include/exec/cpu-common.h39
-rw-r--r--include/exec/cpu_ldst.h1
-rw-r--r--include/exec/exec-all.h26
-rw-r--r--include/exec/gdbstub.h25
-rw-r--r--include/exec/poison.h2
-rw-r--r--include/hw/arm/virt.h1
-rw-r--r--include/hw/core/cpu.h33
-rw-r--r--include/hw/intc/riscv_imsic.h68
-rw-r--r--include/hw/riscv/opentitan.h4
-rw-r--r--include/hw/riscv/virt.h41
-rw-r--r--include/qemu-common.h2
-rw-r--r--include/qemu/coroutine-tls.h165
-rw-r--r--include/qemu/cpuid.h20
-rw-r--r--include/qemu/hbitmap.h12
-rw-r--r--include/qemu/job.h22
-rw-r--r--include/qemu/main-loop.h42
-rw-r--r--include/qemu/memalign.h61
-rw-r--r--include/qemu/osdep.h31
-rw-r--r--include/qemu/rcu.h7
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/qemu/xattr.h4
-rw-r--r--include/sysemu/accel-ops.h3
-rw-r--r--include/sysemu/arch_init.h2
-rw-r--r--include/sysemu/block-backend-common.h102
-rw-r--r--include/sysemu/block-backend-global-state.h116
-rw-r--r--include/sysemu/block-backend-io.h161
-rw-r--r--include/sysemu/block-backend.h269
-rw-r--r--include/sysemu/blockdev.h13
-rw-r--r--include/sysemu/hax.h18
-rw-r--r--include/sysemu/hw_accel.h5
-rw-r--r--include/sysemu/kvm.h6
-rw-r--r--include/sysemu/memory_mapping.h5
-rw-r--r--include/sysemu/os-posix.h1
-rw-r--r--include/sysemu/os-win32.h8
-rw-r--r--include/tcg/tcg-opc.h3
-rw-r--r--include/tcg/tcg.h5
52 files changed, 3979 insertions, 2744 deletions
diff --git a/include/block/block-common.h b/include/block/block-common.h
new file mode 100644
index 0000000..fdb7306
--- /dev/null
+++ b/include/block/block-common.h
@@ -0,0 +1,419 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_COMMON_H
+#define BLOCK_COMMON_H
+
+#include "block/aio.h"
+#include "block/aio-wait.h"
+#include "qemu/iov.h"
+#include "qemu/coroutine.h"
+#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
+#include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
+
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildClass BdrvChildClass;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+ /*
+ * True if this block driver only supports compressed writes
+ */
+ bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+
+ /*
+ * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+ * that the block driver should unmap (discard) blocks if it is guaranteed
+ * that the result will read back as zeroes. The flag is only passed to the
+ * driver if the block device is opened with BDRV_O_UNMAP.
+ */
+ BDRV_REQ_MAY_UNMAP = 0x4,
+
+ BDRV_REQ_FUA = 0x10,
+ BDRV_REQ_WRITE_COMPRESSED = 0x20,
+
+ /*
+ * Signifies that this write request will not change the visible disk
+ * content.
+ */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
+ /*
+ * Forces request serialisation. Use only with write requests.
+ */
+ BDRV_REQ_SERIALISING = 0x80,
+
+ /*
+ * Execute the request only if the operation can be offloaded or otherwise
+ * be executed efficiently, but return an error instead of using a slow
+ * fallback.
+ */
+ BDRV_REQ_NO_FALLBACK = 0x100,
+
+ /*
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
+ */
+ BDRV_REQ_PREFETCH = 0x200,
+
+ /*
+ * If we need to wait for other requests, just fail immediately. Used
+ * only together with BDRV_REQ_SERIALISING. Used only with requests aligned
+ * to request_alignment (corresponding assertions are in block/io.c).
+ */
+ BDRV_REQ_NO_WAIT = 0x400,
+
+ /* Mask of valid flags */
+ BDRV_REQ_MASK = 0x7ff,
+} BdrvRequestFlags;
+
+#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
+ writes in a snapshot */
+#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
+ thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
+ select an appropriate protocol driver,
+ ignoring the format layer */
+#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
+#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
+ read-write fails */
+#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+/* Option names of options parsed by the block layer */
+
+#define BDRV_OPT_CACHE_WB "cache.writeback"
+#define BDRV_OPT_CACHE_DIRECT "cache.direct"
+#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+#define BDRV_OPT_READ_ONLY "read-only"
+#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
+#define BDRV_OPT_DISCARD "discard"
+#define BDRV_OPT_FORCE_SHARE "force-share"
+
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+
+#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
+ INT_MAX >> BDRV_SECTOR_BITS)
+#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
+
+/*
+ * We want allow aligning requests and disk length up to any 32bit alignment
+ * and don't afraid of overflow.
+ * To achieve it, and in the same time use some pretty number as maximum disk
+ * size, let's define maximum "length" (a limit for any offset/bytes request and
+ * for disk size) to be the greatest power of 2 less than INT64_MAX.
+ */
+#define BDRV_MAX_ALIGNMENT (1L << 30)
+#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
+
+/*
+ * Allocation status flags for bdrv_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
+ * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
+ *
+ * Internal flags:
+ * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
+ * that the block layer recompute the answer from the returned
+ * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ * zeroes in file child of current block node inside
+ * returned region. Only valid together with both
+ * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ * appear with BDRV_BLOCK_ZERO.
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
+ *
+ * DATA ZERO OFFSET_VALID
+ * t t t sectors read as zero, returned file is zero at offset
+ * t f t sectors read as valid from file at offset
+ * f t t sectors preallocated, read as zero, returned file not
+ * necessarily zero at offset
+ * f f t sectors preallocated but read from backing_hd,
+ * returned file contains garbage at offset
+ * t t f sectors preallocated, read as zero, unknown offset
+ * t f f sectors read from unknown file or offset
+ * f t f not allocated or unknown offset, read as zero
+ * f f f not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA 0x01
+#define BDRV_BLOCK_ZERO 0x02
+#define BDRV_BLOCK_OFFSET_VALID 0x04
+#define BDRV_BLOCK_RAW 0x08
+#define BDRV_BLOCK_ALLOCATED 0x10
+#define BDRV_BLOCK_EOF 0x20
+#define BDRV_BLOCK_RECURSE 0x40
+
+typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+ bool backing_missing;
+ BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
+ BlockDriverState *old_file_bs; /* keep pointer for permissions update */
+ QDict *options;
+ QDict *explicit_options;
+ void *opaque;
+} BDRVReopenState;
+
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+ BLOCK_OP_TYPE_BACKUP_SOURCE,
+ BLOCK_OP_TYPE_BACKUP_TARGET,
+ BLOCK_OP_TYPE_CHANGE,
+ BLOCK_OP_TYPE_COMMIT_SOURCE,
+ BLOCK_OP_TYPE_COMMIT_TARGET,
+ BLOCK_OP_TYPE_DATAPLANE,
+ BLOCK_OP_TYPE_DRIVE_DEL,
+ BLOCK_OP_TYPE_EJECT,
+ BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+ BLOCK_OP_TYPE_MIRROR_SOURCE,
+ BLOCK_OP_TYPE_MIRROR_TARGET,
+ BLOCK_OP_TYPE_RESIZE,
+ BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_MAX,
+} BlockOpType;
+
+/* Block node permission constants */
+enum {
+ /**
+ * A user that has the "permission" of consistent reads is guaranteed that
+ * their view of the contents of the block device is complete and
+ * self-consistent, representing the contents of a disk at a specific
+ * point.
+ *
+ * For most block devices (including their backing files) this is true, but
+ * the property cannot be maintained in a few situations like for
+ * intermediate nodes of a commit block job.
+ */
+ BLK_PERM_CONSISTENT_READ = 0x01,
+
+ /** This permission is required to change the visible disk contents. */
+ BLK_PERM_WRITE = 0x02,
+
+ /**
+ * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+ * required for writes to the block node when the caller promises that
+ * the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
+ */
+ BLK_PERM_WRITE_UNCHANGED = 0x04,
+
+ /** This permission is required to change the size of a block node. */
+ BLK_PERM_RESIZE = 0x08,
+
+ /**
+ * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
+ * 6.1 and earlier may still lock the corresponding byte in block/file-posix
+ * locking. So, implementing some new permission should be very careful to
+ * not interfere with this old unused thing.
+ */
+
+ BLK_PERM_ALL = 0x0f,
+
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_WRITE_UNCHANGED
+ | BLK_PERM_RESIZE,
+
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
+};
+
+/*
+ * Flags that parent nodes assign to child nodes to specify what kind of
+ * role(s) they take.
+ *
+ * At least one of DATA, METADATA, FILTERED, or COW must be set for
+ * every child.
+ */
+enum BdrvChildRoleBits {
+ /*
+ * This child stores data.
+ * Any node may have an arbitrary number of such children.
+ */
+ BDRV_CHILD_DATA = (1 << 0),
+
+ /*
+ * This child stores metadata.
+ * Any node may have an arbitrary number of metadata-storing
+ * children.
+ */
+ BDRV_CHILD_METADATA = (1 << 1),
+
+ /*
+ * A child that always presents exactly the same visible data as
+ * the parent, e.g. by virtue of the parent forwarding all reads
+ * and writes.
+ * This flag is mutually exclusive with DATA, METADATA, and COW.
+ * Any node may have at most one filtered child at a time.
+ */
+ BDRV_CHILD_FILTERED = (1 << 2),
+
+ /*
+ * Child from which to read all data that isn't allocated in the
+ * parent (i.e., the backing child); such data is copied to the
+ * parent through COW (and optionally COR).
+ * This field is mutually exclusive with DATA, METADATA, and
+ * FILTERED.
+ * Any node may have at most one such backing child at a time.
+ */
+ BDRV_CHILD_COW = (1 << 3),
+
+ /*
+ * The primary child. For most drivers, this is the child whose
+ * filename applies best to the parent node.
+ * Any node may have at most one primary child at a time.
+ */
+ BDRV_CHILD_PRIMARY = (1 << 4),
+
+ /* Useful combination of flags */
+ BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
+ | BDRV_CHILD_METADATA
+ | BDRV_CHILD_PRIMARY,
+};
+
+/* Mask of BdrvChildRoleBits values */
+typedef unsigned int BdrvChildRole;
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+typedef struct BlockSizes {
+ uint32_t phys;
+ uint32_t log;
+} BlockSizes;
+
+typedef struct HDGeometry {
+ uint32_t heads;
+ uint32_t sectors;
+ uint32_t cylinders;
+} HDGeometry;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * These functions must never call any function from other categories
+ * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
+ * all of them.
+ */
+
+char *bdrv_perm_names(uint64_t perm);
+uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
+
+void bdrv_init_with_whitelist(void);
+bool bdrv_uses_whitelist(void);
+int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
+
+int bdrv_parse_aio(const char *mode, int *flags);
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+
+int path_has_protocol(const char *path);
+int path_is_absolute(const char *path);
+char *path_combine(const char *base_path, const char *filename);
+
+char *bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ Error **errp);
+
+#endif /* BLOCK_COMMON_H */
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 99370fa..68bbd34 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
typedef struct BlockCopyCallState BlockCopyCallState;
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ const BdrvDirtyBitmap *bitmap,
Error **errp);
/* Function should be called prior any actual copy request */
@@ -34,6 +35,7 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
void block_copy_state_free(BlockCopyState *s);
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
int64_t block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset, int64_t *count);
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
new file mode 100644
index 0000000..25bb69b
--- /dev/null
+++ b/include/block/block-global-state.h
@@ -0,0 +1,253 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_GLOBAL_STATE_H
+#define BLOCK_GLOBAL_STATE_H
+
+#include "block-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * If a function modifies the graph, it also uses drain and/or
+ * aio_context_acquire/release to be sure it has unique access.
+ * aio_context locking is needed together with BQL because of
+ * the thread-safe I/O API that concurrently runs and accesses
+ * the graph without the BQL.
+ *
+ * It is important to note that not all of these functions are
+ * necessarily limited to running under the BQL, but they would
+ * require additional auditing and many small thread-safety changes
+ * to move them into the I/O API. Often it's not worth doing that
+ * work since the APIs are only used with the BQL held at the
+ * moment, so they have been placed in the GS API (for now).
+ *
+ * These functions can call any function from this and other categories
+ * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs.
+ *
+ * All functions in this header must use the macro
+ * GLOBAL_STATE_CODE();
+ * to catch when they are accidentally called without the BQL.
+ */
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix,
+ Error **errp);
+BlockDriver *bdrv_find_format(const char *format_name);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QemuOpts *opts, Error **errp);
+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
+
+BlockDriverState *bdrv_new(void);
+int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp);
+int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
+ Error **errp);
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+ Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+ int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
+
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState *parent,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ bool allow_none, Error **errp);
+BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+ const char *bdref_key, Error **errp);
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+ const char *node_name,
+ QDict *options, int flags,
+ Error **errp);
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+ int flags, Error **errp);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, QDict *options,
+ bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+ Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+void bdrv_refresh_filename(BlockDriverState *bs);
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_make_empty(BdrvChild *c, Error **errp);
+int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt, bool warn);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+ const char *backing_file_str);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
+
+/*
+ * The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation
+ */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+ int64_t total_work_size, void *opaque);
+int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force,
+ Error **errp);
+
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+ const char *node_name, Error **errp);
+
+int bdrv_activate(BlockDriverState *bs, Error **errp);
+void bdrv_activate_all(Error **errp);
+int bdrv_inactivate_all(void);
+
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
+void bdrv_drain_all(void);
+
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
+XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+ const char *node_name,
+ Error **errp);
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
+BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
+
+typedef struct BdrvNextIterator {
+ enum {
+ BDRV_NEXT_BACKEND_ROOTS,
+ BDRV_NEXT_MONITOR_OWNED,
+ } phase;
+ BlockBackend *blk;
+ BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it);
+BlockDriverState *bdrv_next(BdrvNextIterator *it);
+void bdrv_next_cleanup(BdrvNextIterator *it);
+
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque, bool read_only);
+int bdrv_get_flags(BlockDriverState *bs);
+char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+char *bdrv_dirname(BlockDriverState *bs, Error **errp);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ bool quiet, Error **errp);
+
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ Error **errp);
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+/**
+ * Locks the AioContext of @bs if it's not the current AioContext. This avoids
+ * double locking which could lead to deadlocks: This is a coroutine_fn, so we
+ * know we already own the lock of the current AioContext.
+ *
+ * May only be called in the main thread.
+ */
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
+
+/**
+ * Unlocks the AioContext of @bs if it's not the current AioContext.
+ */
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
+
+void bdrv_set_aio_context_ignore(BlockDriverState *bs,
+ AioContext *new_context, GSList **ignore);
+int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ Error **errp);
+int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
+ GSList **ignore, Error **errp);
+bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GSList **ignore, Error **errp);
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
+int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
+
+void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+
+/**
+ *
+ * bdrv_register_buf/bdrv_unregister_buf:
+ *
+ * Register/unregister a buffer for I/O. For example, VFIO drivers are
+ * interested to know the memory areas that would later be used for I/O, so
+ * that they can prepare IOMMU mapping etc., to get better performance.
+ */
+void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
+#endif /* BLOCK_GLOBAL_STATE_H */
diff --git a/include/block/block-io.h b/include/block/block-io.h
new file mode 100644
index 0000000..5e3f346
--- /dev/null
+++ b/include/block/block-io.h
@@ -0,0 +1,368 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_IO_H
+#define BLOCK_IO_H
+
+#include "block-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe, and therefore
+ * can run in any thread as long as the thread has called
+ * aio_context_acquire/release().
+ *
+ * These functions can only call functions from I/O and Common categories,
+ * but can be invoked by GS, "I/O or GS" and I/O APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
+ int64_t bytes);
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int64_t bytes);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags,
+ Error **errp);
+
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+ BlockDriverState *in_bs, Error **errp);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+
+/* async block I/O */
+void bdrv_aio_cancel(BlockAIOCB *acb);
+void bdrv_aio_cancel_async(BlockAIOCB *acb);
+
+/* sg packet commands */
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+
+/* Ensure contents are flushed to disk. */
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
+int bdrv_block_status(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map,
+ BlockDriverState **file);
+int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ bool include_base, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
+ int64_t bytes);
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+ bool ignore_allow_rdw, Error **errp);
+int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+ Error **errp);
+bool bdrv_is_read_only(BlockDriverState *bs);
+bool bdrv_is_writable(BlockDriverState *bs);
+bool bdrv_is_sg(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+
+bool bdrv_supports_compressed_writes(BlockDriverState *bs);
+const char *bdrv_get_node_name(const BlockDriverState *bs);
+const char *bdrv_get_device_name(const BlockDriverState *bs);
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
+ Error **errp);
+BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ int64_t *cluster_offset,
+ int64_t *cluster_bytes);
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+/*
+ * Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack
+ */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_blockalign0(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
+
+#define BLKDBG_EVENT(child, evt) \
+ do { \
+ if (child) { \
+ bdrv_debug_event(child->bs, evt); \
+ } \
+ } while (0)
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * Move the current coroutine to the AioContext of @bs and return the old
+ * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
+ * will wait for the operation to proceed until the corresponding
+ * bdrv_co_leave().
+ *
+ * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
+ * this will deadlock.
+ */
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
+
+/**
+ * Ends a section started by bdrv_co_enter(). Move the current coroutine back
+ * to old_ctx and decrease bs->in_flight again.
+ */
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
+
+/**
+ * Transfer control to @co in the aio context of @bs
+ */
+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
+
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
+
+void bdrv_io_plug(BlockDriverState *bs);
+void bdrv_io_unplug(BlockDriverState *bs);
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Supported flags:
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ * write on @dst as if bdrv_co_pwrite_zeroes is
+ * called. Used to simplify caller code, or
+ * during BlockDriver.bdrv_co_copy_range_from()
+ * recursion.
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ * requests currently in flight.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+/**
+ * bdrv_drained_end_no_poll:
+ *
+ * Same as bdrv_drained_end(), but do not poll for the subgraph to
+ * actually become unquiesced. Therefore, no graph changes will occur
+ * with this function.
+ *
+ * *drained_end_counter is incremented for every background operation
+ * that is scheduled, and will be decremented for every operation once
+ * it settles. The caller must poll until it reaches 0. The counter
+ * should be accessed using atomic operations only.
+ */
+void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which
+ * requires the caller to be either in the main thread and hold
+ * the BlockdriverState (bs) AioContext lock, or directly in the
+ * home thread that runs the bs AioContext. Calling them from
+ * another thread in another AioContext would cause deadlocks.
+ *
+ * Therefore, these functions are not proper I/O, because they
+ * can't run in *any* iothreads, but only in a specific one.
+ *
+ * These functions can call any function from I/O, Common and this
+ * categories, but must be invoked only by other "I/O or GS" and GS APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_OR_GS_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+#define BDRV_POLL_WHILE(bs, cond) ({ \
+ BlockDriverState *bs_ = (bs); \
+ IO_OR_GS_CODE(); \
+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
+ cond); })
+
+void bdrv_drain(BlockDriverState *bs);
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+
+int generated_co_wrapper
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+/* Invalidate any cached metadata used by image formats */
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
+ Error **errp);
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
+ int64_t bytes);
+int generated_co_wrapper
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+ * any pending activity to cease.
+ */
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+
+/**
+ * bdrv_parent_drained_end_single:
+ *
+ * End a quiesced section for the parent of @c.
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled, which may result in graph changes.
+ */
+void bdrv_parent_drained_end_single(BdrvChild *c);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+ * and if @recursive is true its children as well (used for subtree drain).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
+
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server, block jobs, and device model.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ BdrvChild *parent, bool ignore_bds_parents);
+
+/**
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
+ * exclusive access to all child nodes as well.
+ */
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled. On one hand, that may result in graph changes. On
+ * the other, this requires that the caller either runs in the main
+ * loop; or that all involved nodes (@bs and all of its parents) are
+ * in the caller's AioContext.
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
+/**
+ * End a quiescent section started by bdrv_subtree_drained_begin().
+ */
+void bdrv_subtree_drained_end(BlockDriverState *bs);
+
+#endif /* BLOCK_IO_H */
diff --git a/include/block/block.h b/include/block/block.h
index e1713ee..1e6b8fe 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -1,864 +1,32 @@
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "block/aio-wait.h"
-#include "qemu/iov.h"
-#include "qemu/coroutine.h"
-#include "block/accounting.h"
-#include "block/dirty-bitmap.h"
-#include "block/blockjob.h"
-#include "qemu/hbitmap.h"
-#include "qemu/transactions.h"
-
/*
- * generated_co_wrapper
- *
- * Function specifier, which does nothing but mark functions to be
- * generated by scripts/block-coroutine-wrapper.py
- *
- * Read more in docs/devel/block-coroutine-wrapper.rst
- */
-#define generated_co_wrapper
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BdrvChild BdrvChild;
-typedef struct BdrvChildClass BdrvChildClass;
-
-typedef struct BlockDriverInfo {
- /* in bytes, 0 if irrelevant */
- int cluster_size;
- /* offset at which the VM state can be saved (0 if not possible) */
- int64_t vm_state_offset;
- bool is_dirty;
- /*
- * True if this block driver only supports compressed writes
- */
- bool needs_compressed_writes;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
- uint64_t allocated_clusters;
- uint64_t total_clusters;
- uint64_t fragmented_clusters;
- uint64_t compressed_clusters;
-} BlockFragInfo;
-
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-
- /*
- * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
- * that the block driver should unmap (discard) blocks if it is guaranteed
- * that the result will read back as zeroes. The flag is only passed to the
- * driver if the block device is opened with BDRV_O_UNMAP.
- */
- BDRV_REQ_MAY_UNMAP = 0x4,
-
- BDRV_REQ_FUA = 0x10,
- BDRV_REQ_WRITE_COMPRESSED = 0x20,
-
- /* Signifies that this write request will not change the visible disk
- * content. */
- BDRV_REQ_WRITE_UNCHANGED = 0x40,
-
- /* Forces request serialisation. Use only with write requests. */
- BDRV_REQ_SERIALISING = 0x80,
-
- /* Execute the request only if the operation can be offloaded or otherwise
- * be executed efficiently, but return an error instead of using a slow
- * fallback. */
- BDRV_REQ_NO_FALLBACK = 0x100,
-
- /*
- * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
- * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
- * filter is involved), in which case it signals that the COR operation
- * need not read the data into memory (qiov) but only ensure they are
- * copied to the top layer (i.e., that COR operation is done).
- */
- BDRV_REQ_PREFETCH = 0x200,
-
- /*
- * If we need to wait for other requests, just fail immediately. Used
- * only together with BDRV_REQ_SERIALISING.
- */
- BDRV_REQ_NO_WAIT = 0x400,
-
- /* Mask of valid flags */
- BDRV_REQ_MASK = 0x7ff,
-} BdrvRequestFlags;
-
-typedef struct BlockSizes {
- uint32_t phys;
- uint32_t log;
-} BlockSizes;
-
-typedef struct HDGeometry {
- uint32_t heads;
- uint32_t sectors;
- uint32_t cylinders;
-} HDGeometry;
-
-#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
-#define BDRV_O_RDWR 0x0002
-#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
-#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
-#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
-#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
-#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
- select an appropriate protocol driver,
- ignoring the format layer */
-#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
-#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
-#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
-
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-
-
-/* Option names of options parsed by the block layer */
-
-#define BDRV_OPT_CACHE_WB "cache.writeback"
-#define BDRV_OPT_CACHE_DIRECT "cache.direct"
-#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
-#define BDRV_OPT_READ_ONLY "read-only"
-#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
-#define BDRV_OPT_DISCARD "discard"
-#define BDRV_OPT_FORCE_SHARE "force-share"
-
-
-#define BDRV_SECTOR_BITS 9
-#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
-
-#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
- INT_MAX >> BDRV_SECTOR_BITS)
-#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
-
-/*
- * We want allow aligning requests and disk length up to any 32bit alignment
- * and don't afraid of overflow.
- * To achieve it, and in the same time use some pretty number as maximum disk
- * size, let's define maximum "length" (a limit for any offset/bytes request and
- * for disk size) to be the greatest power of 2 less than INT64_MAX.
- */
-#define BDRV_MAX_ALIGNMENT (1L << 30)
-#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
-
-/*
- * Allocation status flags for bdrv_block_status() and friends.
- *
- * Public flags:
- * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
- * BDRV_BLOCK_ZERO: offset reads as zero
- * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
- * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer rather than any backing, set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
- * layer, set by block layer
- *
- * Internal flags:
- * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
- * that the block layer recompute the answer from the returned
- * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
- * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
- * zeroes in file child of current block node inside
- * returned region. Only valid together with both
- * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
- * appear with BDRV_BLOCK_ZERO.
- *
- * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
- * host offset within the returned BDS that is allocated for the
- * corresponding raw guest data. However, whether that offset
- * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
- *
- * DATA ZERO OFFSET_VALID
- * t t t sectors read as zero, returned file is zero at offset
- * t f t sectors read as valid from file at offset
- * f t t sectors preallocated, read as zero, returned file not
- * necessarily zero at offset
- * f f t sectors preallocated but read from backing_hd,
- * returned file contains garbage at offset
- * t t f sectors preallocated, read as zero, unknown offset
- * t f f sectors read from unknown file or offset
- * f t f not allocated or unknown offset, read as zero
- * f f f not allocated or unknown offset, read from backing_hd
- */
-#define BDRV_BLOCK_DATA 0x01
-#define BDRV_BLOCK_ZERO 0x02
-#define BDRV_BLOCK_OFFSET_VALID 0x04
-#define BDRV_BLOCK_RAW 0x08
-#define BDRV_BLOCK_ALLOCATED 0x10
-#define BDRV_BLOCK_EOF 0x20
-#define BDRV_BLOCK_RECURSE 0x40
-
-typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
- BlockDriverState *bs;
- int flags;
- BlockdevDetectZeroesOptions detect_zeroes;
- bool backing_missing;
- BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
- BlockDriverState *old_file_bs; /* keep pointer for permissions update */
- QDict *options;
- QDict *explicit_options;
- void *opaque;
-} BDRVReopenState;
-
-/*
- * Block operation types
- */
-typedef enum BlockOpType {
- BLOCK_OP_TYPE_BACKUP_SOURCE,
- BLOCK_OP_TYPE_BACKUP_TARGET,
- BLOCK_OP_TYPE_CHANGE,
- BLOCK_OP_TYPE_COMMIT_SOURCE,
- BLOCK_OP_TYPE_COMMIT_TARGET,
- BLOCK_OP_TYPE_DATAPLANE,
- BLOCK_OP_TYPE_DRIVE_DEL,
- BLOCK_OP_TYPE_EJECT,
- BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
- BLOCK_OP_TYPE_MIRROR_SOURCE,
- BLOCK_OP_TYPE_MIRROR_TARGET,
- BLOCK_OP_TYPE_RESIZE,
- BLOCK_OP_TYPE_STREAM,
- BLOCK_OP_TYPE_REPLACE,
- BLOCK_OP_TYPE_MAX,
-} BlockOpType;
-
-/* Block node permission constants */
-enum {
- /**
- * A user that has the "permission" of consistent reads is guaranteed that
- * their view of the contents of the block device is complete and
- * self-consistent, representing the contents of a disk at a specific
- * point.
- *
- * For most block devices (including their backing files) this is true, but
- * the property cannot be maintained in a few situations like for
- * intermediate nodes of a commit block job.
- */
- BLK_PERM_CONSISTENT_READ = 0x01,
-
- /** This permission is required to change the visible disk contents. */
- BLK_PERM_WRITE = 0x02,
-
- /**
- * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
- * required for writes to the block node when the caller promises that
- * the visible disk content doesn't change.
- *
- * As the BLK_PERM_WRITE permission is strictly stronger, either is
- * sufficient to perform an unchanging write.
- */
- BLK_PERM_WRITE_UNCHANGED = 0x04,
-
- /** This permission is required to change the size of a block node. */
- BLK_PERM_RESIZE = 0x08,
-
- /**
- * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
- * 6.1 and earlier may still lock the corresponding byte in block/file-posix
- * locking. So, implementing some new permission should be very careful to
- * not interfere with this old unused thing.
- */
-
- BLK_PERM_ALL = 0x0f,
-
- DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
- | BLK_PERM_WRITE
- | BLK_PERM_WRITE_UNCHANGED
- | BLK_PERM_RESIZE,
-
- DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
-};
-
-/*
- * Flags that parent nodes assign to child nodes to specify what kind of
- * role(s) they take.
- *
- * At least one of DATA, METADATA, FILTERED, or COW must be set for
- * every child.
- */
-enum BdrvChildRoleBits {
- /*
- * This child stores data.
- * Any node may have an arbitrary number of such children.
- */
- BDRV_CHILD_DATA = (1 << 0),
-
- /*
- * This child stores metadata.
- * Any node may have an arbitrary number of metadata-storing
- * children.
- */
- BDRV_CHILD_METADATA = (1 << 1),
-
- /*
- * A child that always presents exactly the same visible data as
- * the parent, e.g. by virtue of the parent forwarding all reads
- * and writes.
- * This flag is mutually exclusive with DATA, METADATA, and COW.
- * Any node may have at most one filtered child at a time.
- */
- BDRV_CHILD_FILTERED = (1 << 2),
-
- /*
- * Child from which to read all data that isn't allocated in the
- * parent (i.e., the backing child); such data is copied to the
- * parent through COW (and optionally COR).
- * This field is mutually exclusive with DATA, METADATA, and
- * FILTERED.
- * Any node may have at most one such backing child at a time.
- */
- BDRV_CHILD_COW = (1 << 3),
-
- /*
- * The primary child. For most drivers, this is the child whose
- * filename applies best to the parent node.
- * Any node may have at most one primary child at a time.
- */
- BDRV_CHILD_PRIMARY = (1 << 4),
-
- /* Useful combination of flags */
- BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
- | BDRV_CHILD_METADATA
- | BDRV_CHILD_PRIMARY,
-};
-
-/* Mask of BdrvChildRoleBits values */
-typedef unsigned int BdrvChildRole;
-
-char *bdrv_perm_names(uint64_t perm);
-uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
-
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-bool bdrv_uses_whitelist(void);
-int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix,
- Error **errp);
-BlockDriver *bdrv_find_format(const char *format_name);
-int bdrv_create(BlockDriver *drv, const char* filename,
- QemuOpts *opts, Error **errp);
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-
-BlockDriverState *bdrv_new(void);
-int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
- Error **errp);
-int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
- Error **errp);
-int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
- Error **errp);
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
- int flags, Error **errp);
-int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
-
-int bdrv_parse_aio(const char *mode, int *flags);
-int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-BdrvChild *bdrv_open_child(const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState* parent,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- bool allow_none, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
- const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
- const char *node_name,
- QDict *options, int flags,
- Error **errp);
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
- int flags, Error **errp);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, QDict *options,
- bool keep_old_opts);
-void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
- Error **errp);
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
- Error **errp);
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
- int64_t bytes);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
- const void *buf, int64_t bytes);
-/*
- * Efficiently zero a region of the disk image. Note that this is a regular
- * I/O request like read or write and should have a reasonable size. This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file);
-void bdrv_refresh_filename(BlockDriverState *bs);
-
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags,
- Error **errp);
-int generated_co_wrapper
-bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-
-int64_t bdrv_nb_sectors(BlockDriverState *bs);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
- BlockDriverState *in_bs, Error **errp);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_make_empty(BdrvChild *c, Error **errp);
-int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
- const char *backing_fmt, bool warn);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
- const char *backing_file_str);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
-
-
-typedef struct BdrvCheckResult {
- int corruptions;
- int leaks;
- int check_errors;
- int corruptions_fixed;
- int leaks_fixed;
- int64_t image_end_offset;
- BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
- BDRV_FIX_LEAKS = 1,
- BDRV_FIX_ERRORS = 2,
-} BdrvCheckMode;
-
-int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-/* The units of offset and total_work_size may be chosen arbitrarily by the
- * block driver; total_work_size may change during the course of the amendment
- * operation */
-typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
- int64_t total_work_size, void *opaque);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- bool force,
- Error **errp);
-
-/* check if a named node can be replaced when doing drive-mirror */
-BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
- const char *node_name, Error **errp);
-
-/* async block I/O */
-void bdrv_aio_cancel(BlockAIOCB *acb);
-void bdrv_aio_cancel_async(BlockAIOCB *acb);
-
-/* sg packet commands */
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
-
-/* Invalidate any cached metadata used by image formats */
-int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
- Error **errp);
-void bdrv_invalidate_cache_all(Error **errp);
-int bdrv_inactivate_all(void);
-
-/* Ensure contents are flushed to disk. */
-int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
-void bdrv_drain_all_begin(void);
-void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
-
-#define BDRV_POLL_WHILE(bs, cond) ({ \
- BlockDriverState *bs_ = (bs); \
- AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
- cond); })
-
-int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
- int64_t bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int bdrv_block_status(BlockDriverState *bs, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file);
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- bool include_base, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
- int64_t bytes);
-
-bool bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
- bool ignore_allow_rdw, Error **errp);
-int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
- Error **errp);
-bool bdrv_is_writable(BlockDriverState *bs);
-bool bdrv_is_sg(BlockDriverState *bs);
-bool bdrv_is_inserted(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find_node(const char *node_name);
-BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
-XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
-BlockDriverState *bdrv_lookup_bs(const char *device,
- const char *node_name,
- Error **errp);
-bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
-BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
-
-typedef struct BdrvNextIterator {
- enum {
- BDRV_NEXT_BACKEND_ROOTS,
- BDRV_NEXT_MONITOR_OWNED,
- } phase;
- BlockBackend *blk;
- BlockDriverState *bs;
-} BdrvNextIterator;
-
-BlockDriverState *bdrv_first(BdrvNextIterator *it);
-BlockDriverState *bdrv_next(BdrvNextIterator *it);
-void bdrv_next_cleanup(BdrvNextIterator *it);
-
-BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
-bool bdrv_supports_compressed_writes(BlockDriverState *bs);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque, bool read_only);
-const char *bdrv_get_node_name(const BlockDriverState *bs);
-const char *bdrv_get_device_name(const BlockDriverState *bs);
-const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
- Error **errp);
-BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- int64_t *cluster_offset,
- int64_t *cluster_bytes);
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size);
-char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
-char *bdrv_get_full_backing_filename_from_filename(const char *backed,
- const char *backing,
- Error **errp);
-char *bdrv_dirname(BlockDriverState *bs, Error **errp);
-
-int path_has_protocol(const char *path);
-int path_is_absolute(const char *path);
-char *path_combine(const char *base_path, const char *filename);
-
-int generated_co_wrapper
-bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int generated_co_wrapper
-bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- bool quiet, Error **errp);
-
-/* Returns the alignment in bytes that is required so that no bounce buffer
- * is required throughout the stack */
-size_t bdrv_min_mem_align(BlockDriverState *bs);
-/* Returns optimal alignment in bytes for bounce buffer */
-size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_blockalign0(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
- BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- Error **errp);
-
-bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
-void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
-void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
-bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-
-#define BLKDBG_EVENT(child, evt) \
- do { \
- if (child) { \
- bdrv_debug_event(child->bs, evt); \
- } \
- } while (0)
-
-void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag);
-int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-/**
- * bdrv_get_aio_context:
+ * QEMU System Emulator block driver
*
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-/**
- * Move the current coroutine to the AioContext of @bs and return the old
- * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
- * will wait for the operation to proceed until the corresponding
- * bdrv_co_leave().
+ * Copyright (c) 2003 Fabrice Bellard
*
- * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
- * this will deadlock.
- */
-AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
-
-/**
- * Ends a section started by bdrv_co_enter(). Move the current coroutine back
- * to old_ctx and decrease bs->in_flight again.
- */
-void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
-
-/**
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
- * know we already own the lock of the current AioContext.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
- * May only be called in the main thread.
- */
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
-
-/**
- * Unlocks the AioContext of @bs if it's not the current AioContext.
- */
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
-
-/**
- * Transfer control to @co in the aio context of @bs
- */
-void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
-
-void bdrv_set_aio_context_ignore(BlockDriverState *bs,
- AioContext *new_context, GSList **ignore);
-int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- Error **errp);
-int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp);
-bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
- GSList **ignore, Error **errp);
-bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- GSList **ignore, Error **errp);
-AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
-AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
-
-int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
-int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
-
-void bdrv_io_plug(BlockDriverState *bs);
-void bdrv_io_unplug(BlockDriverState *bs);
-
-/**
- * bdrv_parent_drained_begin_single:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
*
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
-
-/**
- * bdrv_parent_drained_end_single:
- *
- * End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
- */
-void bdrv_parent_drained_end_single(BdrvChild *c);
-
-/**
- * bdrv_drain_poll:
- *
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
- *
- * If @ignore_bds_parents is true, parents that are BlockDriverStates must
- * ignore the drain request because they will be drained separately (used for
- * drain_all).
- *
- * This is part of bdrv_drained_begin.
- */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
-
-/**
- * bdrv_drained_begin:
- *
- * Begin a quiesced section for exclusive access to the BDS, by disabling
- * external request sources including NBD server, block jobs, and device model.
- *
- * This function can be recursive.
- */
-void bdrv_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_do_drained_begin_quiesce:
- *
- * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
- * running requests to complete.
- */
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents);
-
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end:
- *
- * End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled. On one hand, that may result in graph changes. On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
- */
-void bdrv_drained_end(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced. Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles. The caller must poll until it reaches 0. The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
-void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
-void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
-
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
- uint32_t granularity, Error **errp);
-/**
- *
- * bdrv_register_buf/bdrv_unregister_buf:
- *
- * Register/unregister a buffer for I/O. For example, VFIO drivers are
- * interested to know the memory areas that would later be used for I/O, so
- * that they can prepare IOMMU mapping etc., to get better performance.
- */
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+#ifndef BLOCK_H
+#define BLOCK_H
-/**
- *
- * bdrv_co_copy_range:
- *
- * Do offloaded copy between two children. If the operation is not implemented
- * by the driver, or if the backend storage doesn't support it, a negative
- * error code will be returned.
- *
- * Note: block layer doesn't emulate or fallback to a bounce buffer approach
- * because usually the caller shouldn't attempt offloaded copy any more (e.g.
- * calling copy_file_range(2)) after the first error, thus it should fall back
- * to a read+write path in the caller level.
- *
- * @src: Source child to copy data from
- * @src_offset: offset in @src image to read data
- * @dst: Destination child to copy data to
- * @dst_offset: offset in @dst image to write data
- * @bytes: number of bytes to copy
- * @flags: request flags. Supported flags:
- * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
- * write on @dst as if bdrv_co_pwrite_zeroes is
- * called. Used to simplify caller code, or
- * during BlockDriver.bdrv_co_copy_range_from()
- * recursion.
- * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
- * requests currently in flight.
- *
- * Returns: 0 if succeeded; negative error code if failed.
- **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
+#include "block-global-state.h"
+#include "block-io.h"
-void bdrv_cancel_in_flight(BlockDriverState *bs);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
-#endif
+#endif /* BLOCK_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
new file mode 100644
index 0000000..8947aba
--- /dev/null
+++ b/include/block/block_int-common.h
@@ -0,0 +1,1246 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_COMMON_H
+#define BLOCK_INT_COMMON_H
+
+#include "block/accounting.h"
+#include "block/block.h"
+#include "block/aio-wait.h"
+#include "qemu/queue.h"
+#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
+#include "qemu/timer.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+#include "qemu/throttle.h"
+#include "qemu/rcu.h"
+
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_HWVERSION "hwversion"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+#define BLOCK_OPT_REDUNDANCY "redundancy"
+#define BLOCK_OPT_NOCOW "nocow"
+#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
+#define BLOCK_OPT_OBJECT_SIZE "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
+#define BLOCK_OPT_DATA_FILE "data_file"
+#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
+#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
+#define BLOCK_OPT_EXTL2 "extended_l2"
+
+#define BLOCK_PROBE_BUF_SIZE 512
+
+enum BdrvTrackedRequestType {
+ BDRV_TRACKED_READ,
+ BDRV_TRACKED_WRITE,
+ BDRV_TRACKED_DISCARD,
+ BDRV_TRACKED_TRUNCATE,
+};
+
+/*
+ * That is not quite good that BdrvTrackedRequest structure is public,
+ * as block/io.c is very careful about incoming offset/bytes being
+ * correct. Be sure to assert bdrv_check_request() succeeded after any
+ * modification of BdrvTrackedRequest object out of block/io.c
+ */
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t offset;
+ int64_t bytes;
+ enum BdrvTrackedRequestType type;
+
+ bool serialising;
+ int64_t overlap_offset;
+ int64_t overlap_bytes;
+
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+
+ struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+
+struct BlockDriver {
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards.
+ */
+
+ const char *format_name;
+ int instance_size;
+
+ /*
+ * Set to true if the BlockDriver is a block filter. Block filters pass
+ * certain callbacks that refer to data (see block.c) to their bs->file
+ * or bs->backing (whichever one exists) if the driver doesn't implement
+ * them. Drivers that do not wish to forward must implement them and return
+ * -ENOTSUP.
+ * Note that filters are not allowed to modify data.
+ *
+ * Filters generally cannot have more than a single filtered child,
+ * because the data they present must at all times be the same as
+ * that on their filtered child. That would be impossible to
+ * achieve for multiple filtered children.
+ * (And this filtered child must then be bs->file or bs->backing.)
+ */
+ bool is_filter;
+ /*
+ * Set to true if the BlockDriver is a format driver. Format nodes
+ * generally do not expect their children to be other format nodes
+ * (except for backing files), and so format probing is disabled
+ * on those children.
+ */
+ bool is_format;
+
+ /*
+ * Drivers not implementing bdrv_parse_filename nor bdrv_open should have
+ * this field set to true, except ones that are defined only by their
+ * child's bs.
+ * An example of the last type will be the quorum block driver.
+ */
+ bool bdrv_needs_filename;
+
+ /*
+ * Set if a driver can support backing files. This also implies the
+ * following semantics:
+ *
+ * - Return status 0 of .bdrv_co_block_status means that corresponding
+ * blocks are not allocated in this layer of backing-chain
+ * - For such (unallocated) blocks, read will:
+ * - fill buffer with zeros if there is no backing file
+ * - read from the backing file otherwise, where the block layer
+ * takes care of reading zeros beyond EOF if backing file is short
+ */
+ bool supports_backing;
+
+ bool has_variable_length;
+
+ /*
+ * Drivers setting this field must be able to work with just a plain
+ * filename with '<protocol_name>:' as a prefix, and no other options.
+ * Options may be extracted from the filename by implementing
+ * bdrv_parse_filename.
+ */
+ const char *protocol_name;
+
+ /* List of options for creating images, terminated by name == NULL */
+ QemuOptsList *create_opts;
+
+ /* List of options for image amend */
+ QemuOptsList *amend_opts;
+
+ /*
+ * If this driver supports reopening images this contains a
+ * NULL-terminated list of the runtime options that can be
+ * modified. If an option in this list is unspecified during
+ * reopen then it _must_ be reset to its default value or return
+ * an error.
+ */
+ const char *const *mutable_opts;
+
+ /*
+ * Pointer to a NULL-terminated array of names of strong options
+ * that can be specified for bdrv_open(). A strong option is one
+ * that changes the data of a BDS.
+ * If this pointer is NULL, the array is considered empty.
+ * "filename" and "driver" are always considered strong.
+ */
+ const char *const *strong_runtime_opts;
+
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * This function is invoked under BQL before .bdrv_co_amend()
+ * (which in contrast does not necessarily run under the BQL)
+ * to allow driver-specific initialization code that requires
+ * the BQL, like setting up specific permission flags.
+ */
+ int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp);
+ /*
+ * This function is invoked under BQL after .bdrv_co_amend()
+ * to allow cleaning up what was done in .bdrv_amend_pre_run().
+ */
+ void (*bdrv_amend_clean)(BlockDriverState *bs);
+
+ /*
+ * Return true if @to_replace can be replaced by a BDS with the
+ * same data as @bs without it affecting @bs's behavior (that is,
+ * without it being visible to @bs's parents).
+ */
+ bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+ int (*bdrv_probe_device)(const char *filename);
+
+ /*
+ * Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation.
+ */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options,
+ Error **errp);
+
+ /* For handling image reopen for split or non-split files. */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+ void (*bdrv_join_options)(QDict *options, QDict *old_options);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+
+ /* Protocol drivers should implement this instead of bdrv_open */
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+ void (*bdrv_close)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
+ Error **errp);
+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+ int (*bdrv_amend_options)(BlockDriverState *bs,
+ QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque,
+ bool force,
+ Error **errp);
+
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+
+ /*
+ * Refreshes the bs->exact_filename field. If that is impossible,
+ * bs->exact_filename has to be left empty.
+ */
+ void (*bdrv_refresh_filename)(BlockDriverState *bs);
+
+ /*
+ * Gathers the open options for all children into @target.
+ * A simple format driver (without backing file support) might
+ * implement this function like this:
+ *
+ * QINCREF(bs->file->bs->full_open_options);
+ * qdict_put(target, "file", bs->file->bs->full_open_options);
+ *
+ * If not specified, the generic implementation will simply put
+ * all children's options under their respective name.
+ *
+ * @backing_overridden is true when bs->backing seems not to be
+ * the child that would result from opening bs->backing_file.
+ * Therefore, if it is true, the backing child's options should be
+ * gathered; otherwise, there is no need since the backing child
+ * is the one implied by the image header.
+ *
+ * Note that ideally this function would not be needed. Every
+ * block driver which implements it is probably doing something
+ * shady regarding its runtime option structure.
+ */
+ void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
+ bool backing_overridden);
+
+ /*
+ * Returns an allocated string which is the directory name of this BDS: It
+ * will be used to make relative filenames absolute by prepending this
+ * function's return value to them.
+ */
+ char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * This informs the driver that we are no longer interested in the result
+ * of in-flight requests, so don't waste the time if possible.
+ *
+ * One example usage is to avoid waiting for an nbd target node reconnect
+ * timeout during job-cancel with force=true.
+ */
+ void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
+ int (*bdrv_inactivate)(BlockDriverState *bs);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ /*
+ * Remove fd handlers, timers, and other event loop callbacks so the event
+ * loop is no longer in use. Called with no in-flight requests and in
+ * depth-first traversal order with parents before child nodes.
+ */
+ void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+ /*
+ * Add fd handlers, timers, and other event loop callbacks so I/O requests
+ * can be processed again. Called with no in-flight requests and in
+ * depth-first traversal order with child nodes before parent nodes.
+ */
+ void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+ AioContext *new_context);
+
+ /**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz and return zero.
+ * On failure, return negative errno.
+ */
+ int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
+ /**
+ * Try to get @bs's geometry (cyls, heads, sectors)
+ * On success, store them in @geo and return 0.
+ * On failure return -errno.
+ * Only drivers that want to override guest geometry implement this
+ * callback; see hd_geometry_guess().
+ */
+ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
+
+ void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+ void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
+ Error **errp);
+
+ /**
+ * Informs the block driver that a permission change is intended. The
+ * driver checks whether the change is permissible and may take other
+ * preparations for the change (e.g. get file system locks). This operation
+ * is always followed either by a call to either .bdrv_set_perm or
+ * .bdrv_abort_perm_update.
+ *
+ * Checks whether the requested set of cumulative permissions in @perm
+ * can be granted for accessing @bs and whether no other users are using
+ * permissions other than those given in @shared (both arguments take
+ * BLK_PERM_* bitmasks).
+ *
+ * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+ * and errp is set to an error describing the conflict.
+ */
+ int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared, Error **errp);
+
+ /**
+ * Called to inform the driver that the set of cumulative set of used
+ * permissions for @bs has changed to @perm, and the set of sharable
+ * permission to @shared. The driver can use this to propagate changes to
+ * its children (i.e. request permissions only if a parent actually needs
+ * them).
+ *
+ * This function is only invoked after bdrv_check_perm(), so block drivers
+ * may rely on preparations made in their .bdrv_check_perm implementation.
+ */
+ void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+ /*
+ * Called to inform the driver that after a previous bdrv_check_perm()
+ * call, the permission update is not performed and any preparations made
+ * for it (e.g. taken file locks) need to be undone.
+ *
+ * This function can be called even for nodes that never saw a
+ * bdrv_check_perm() call. It is a no-op then.
+ */
+ void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+ /**
+ * Returns in @nperm and @nshared the permissions that the driver for @bs
+ * needs on its child @c, based on the cumulative permissions requested by
+ * the parents in @parent_perm and @parent_shared.
+ *
+ * If @c is NULL, return the permissions for attaching a new child for the
+ * given @child_class and @role.
+ *
+ * If @reopen_queue is non-NULL, don't return the currently needed
+ * permissions, but those that will be needed after applying the
+ * @reopen_queue.
+ */
+ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t parent_perm, uint64_t parent_shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+ /**
+ * Register/unregister a buffer for I/O. For example, when the driver is
+ * interested to know the memory areas that will later be used in iovs, so
+ * that it can do IOMMU mapping with VFIO etc., in order to get better
+ * performance. In the case of VFIO drivers, this callback is used to do
+ * DMA mapping for hot buffers.
+ */
+ void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+
+ /*
+ * This field is modified only under the BQL, and is part of
+ * the global state.
+ */
+ QLIST_ENTRY(BlockDriver) list;
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+
+ int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
+ BlockdevAmendOptions *opts,
+ bool force,
+ Error **errp);
+
+ /* aio */
+ BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int bytes,
+ BlockCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+ /**
+ * @offset: position in bytes to read at
+ * @bytes: number of bytes to read
+ * @qiov: the buffers to fill with read data
+ * @flags: currently unused, always 0
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ int flags);
+ /**
+ * @offset: position in bytes to write at
+ * @bytes: number of bytes to write
+ * @qiov: the buffers containing data to write
+ * @flags: zero or more bits allowed by 'supported_write_flags'
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
+ * will be called instead.
+ */
+ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t src_offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Building block for bdrv_block_status[_above] and
+ * bdrv_is_allocated[_above]. The driver should answer only
+ * according to the current layer, and should only need to set
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
+ * block.h for the overall meaning of the bits. As a hint, the
+ * flag want_zero is true if the caller cares more about precise
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+ * overall allocation (favor larger *pnum, perhaps by reporting
+ * _DATA instead of _ZERO). The block layer guarantees input
+ * clamped to bdrv_getlength() and aligned to request_alignment,
+ * as well as non-NULL pnum, map, and file; in turn, the driver
+ * must return an error or set pnum to an aligned non-zero value.
+ *
+ * Note that @bytes is just a hint on how big of a region the
+ * caller wants to inspect. It is not a limit on *pnum.
+ * Implementations are free to return larger values of *pnum if
+ * doing so does not incur a performance penalty.
+ *
+ * block/io.c's bdrv_co_block_status() will utilize an unclamped
+ * *pnum value for the block-status cache on protocol nodes, prior
+ * to clamping *pnum for return to its caller.
+ */
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+
+ /*
+ * Snapshot-access API.
+ *
+ * Block-driver may provide snapshot-access API: special functions to access
+ * some internal "snapshot". The functions are similar with normal
+ * read/block_status/discard handler, but don't have any specific handling
+ * in generic block-layer: no serializing, no alignment, no tracked
+ * requests. So, block-driver that realizes these APIs is fully responsible
+ * for synchronization between snapshot-access API and normal IO requests.
+ *
+ * TODO: To be able to support qcow2's internal snapshots, this API will
+ * need to be extended to:
+ * - be able to select a specific snapshot
+ * - receive the snapshot's actual length (which may differ from bs's
+ * length)
+ */
+ int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+ int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+ int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data for all layers by calling bdrv_co_flush for underlying
+ * layers, if needed. This function is needed for deterministic
+ * synchronization of the flush finishing callback.
+ */
+ int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+
+ /* Delete a created file. */
+ int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example file-posix.c calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ /*
+ * Truncate @bs to @offset bytes using the given @prealloc mode
+ * when growing. Modes other than PREALLOC_MODE_OFF should be
+ * rejected when shrinking @bs.
+ *
+ * If @exact is true, @bs must be resized to exactly @offset.
+ * Otherwise, it is sufficient for @bs (if it is a host block
+ * device and thus there is no way to resize it) to be at least
+ * @offset bytes in length.
+ *
+ * If @exact is true and this function fails but would succeed
+ * with @exact = false, it should return -ENOTSUP.
+ */
+ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
+ bool exact, PreallocMode prealloc,
+ BdrvRequestFlags flags, Error **errp);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
+ Error **errp);
+
+ int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ size_t qiov_offset);
+
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
+ Error **errp);
+ BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+ int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+
+ /* removable device specific */
+ bool (*bdrv_is_inserted)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+ int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf);
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
+ BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
+
+ /* io queue for linux-aio */
+ void (*bdrv_io_plug)(BlockDriverState *bs);
+ void (*bdrv_io_unplug)(BlockDriverState *bs);
+
+ /**
+ * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * drain operation to drain and stop any internal sources of requests in
+ * the driver.
+ * bdrv_co_drain_end is called if implemented at the end of the drain.
+ *
+ * They should be used by the driver to e.g. manage scheduled I/O
+ * requests, or toggle an internal state. After the end of the drain new
+ * requests will continue normally.
+ */
+ void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
+ void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+
+ bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
+ bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ uint32_t granularity,
+ Error **errp);
+ int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
+};
+
+static inline bool block_driver_can_compress(BlockDriver *drv)
+{
+ return drv->bdrv_co_pwritev_compressed ||
+ drv->bdrv_co_pwritev_compressed_part;
+}
+
+typedef struct BlockLimits {
+ /*
+ * Alignment requirement, in bytes, for offset/length of I/O
+ * requests. Must be a power of 2 less than INT_MAX; defaults to
+ * 1 for drivers with modern byte interfaces, and to 512
+ * otherwise.
+ */
+ uint32_t request_alignment;
+
+ /*
+ * Maximum number of bytes that can be discarded at once. Must be multiple
+ * of pdiscard_alignment, but need not be power of 2. May be 0 if no
+ * inherent 64-bit limit.
+ */
+ int64_t max_pdiscard;
+
+ /*
+ * Optimal alignment for discard requests in bytes. A power of 2
+ * is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pdiscard if
+ * that is set. May be 0 if bl.request_alignment is good enough
+ */
+ uint32_t pdiscard_alignment;
+
+ /*
+ * Maximum number of bytes that can zeroized at once. Must be multiple of
+ * pwrite_zeroes_alignment. 0 means no limit.
+ */
+ int64_t max_pwrite_zeroes;
+
+ /*
+ * Optimal alignment for write zeroes requests in bytes. A power
+ * of 2 is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pwrite_zeroes
+ * if that is set. May be 0 if bl.request_alignment is good
+ * enough
+ */
+ uint32_t pwrite_zeroes_alignment;
+
+ /*
+ * Optimal transfer length in bytes. A power of 2 is best but not
+ * mandatory. Must be a multiple of bl.request_alignment, or 0 if
+ * no preferred size
+ */
+ uint32_t opt_transfer;
+
+ /*
+ * Maximal transfer length in bytes. Need not be power of 2, but
+ * must be multiple of opt_transfer and bl.request_alignment, or 0
+ * for no 32-bit limit. For now, anything larger than INT_MAX is
+ * clamped down.
+ */
+ uint32_t max_transfer;
+
+ /*
+ * Maximal hardware transfer length in bytes. Applies whenever
+ * transfers to the device bypass the kernel I/O scheduler, for
+ * example with SG_IO. If larger than max_transfer or if zero,
+ * blk_get_max_hw_transfer will fall back to max_transfer.
+ */
+ uint64_t max_hw_transfer;
+
+ /*
+ * Maximal number of scatter/gather elements allowed by the hardware.
+ * Applies whenever transfers to the device bypass the kernel I/O
+ * scheduler, for example with SG_IO. If larger than max_iov
+ * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+ */
+ int max_hw_iov;
+
+
+ /* memory alignment, in bytes so that no bounce buffer is needed */
+ size_t min_mem_alignment;
+
+ /* memory alignment, in bytes, for bounce buffer */
+ size_t opt_mem_alignment;
+
+ /* maximum number of iovec elements */
+ int max_iov;
+} BlockLimits;
+
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
+typedef struct BdrvAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+
+ void *opaque;
+ bool deleted;
+
+ QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
+struct BdrvChildClass {
+ /*
+ * If true, bdrv_replace_node() doesn't change the node this BdrvChild
+ * points to.
+ */
+ bool stay_at_node;
+
+ /*
+ * If true, the parent is a BlockDriverState and bdrv_next_all_states()
+ * will return it. This information is used for drain_all, where every node
+ * will be drained separately, so the drain only needs to be propagated to
+ * non-BDS parents.
+ */
+ bool parent_is_bds;
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+ void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
+ int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options);
+ void (*change_media)(BdrvChild *child, bool load);
+
+ /*
+ * Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory.
+ */
+ char *(*get_parent_desc)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the child has been activated/inactivated (e.g.
+ * when migration is completing) and it can start/stop requesting
+ * permissions and doing I/O on it.
+ */
+ void (*activate)(BdrvChild *child, Error **errp);
+ int (*inactivate)(BdrvChild *child);
+
+ void (*attach)(BdrvChild *child);
+ void (*detach)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference.
+ */
+ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+ const char *filename, Error **errp);
+
+ bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GSList **ignore, Error **errp);
+ void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+ AioContext *(*get_parent_aio_context)(BdrvChild *child);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ void (*resize)(BdrvChild *child);
+
+ /*
+ * Returns a name that is supposedly more useful for human users than the
+ * node name for identifying the node in question (in particular, a BB
+ * name), or NULL if the parent can't provide a better name.
+ */
+ const char *(*get_name)(BdrvChild *child);
+
+ /*
+ * If this pair of functions is implemented, the parent doesn't issue new
+ * requests after returning from .drained_begin() until .drained_end() is
+ * called.
+ *
+ * These functions must not change the graph (and therefore also must not
+ * call aio_poll(), which could change the graph indirectly).
+ *
+ * If drained_end() schedules background operations, it must atomically
+ * increment *drained_end_counter for each such operation and atomically
+ * decrement it once the operation has settled.
+ *
+ * Note that this can be nested. If drained_begin() was called twice, new
+ * I/O is allowed only after drained_end() was called twice, too.
+ */
+ void (*drained_begin)(BdrvChild *child);
+ void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+
+ /*
+ * Returns whether the parent has pending requests for the child. This
+ * callback is polled after .drained_begin() has been called until all
+ * activity on the child has stopped.
+ */
+ bool (*drained_poll)(BdrvChild *child);
+};
+
+extern const BdrvChildClass child_of_bds;
+
+struct BdrvChild {
+ BlockDriverState *bs;
+ char *name;
+ const BdrvChildClass *klass;
+ BdrvChildRole role;
+ void *opaque;
+
+ /**
+ * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+ */
+ uint64_t perm;
+
+ /**
+ * Permissions that can still be granted to other users of @bs while this
+ * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+ */
+ uint64_t shared_perm;
+
+ /*
+ * This link is frozen: the child can neither be replaced nor
+ * detached from the parent.
+ */
+ bool frozen;
+
+ /*
+ * How many times the parent of this child has been drained
+ * (through klass->drained_*).
+ * Usually, this is equal to bs->quiesce_counter (potentially
+ * reduced by bdrv_drain_all_count). It may differ while the
+ * child is entering or leaving a drained section.
+ */
+ int parent_quiesce_counter;
+
+ QLIST_ENTRY(BdrvChild) next;
+ QLIST_ENTRY(BdrvChild) next_parent;
+};
+
+/*
+ * Allows bdrv_co_block_status() to cache one data region for a
+ * protocol node.
+ *
+ * @valid: Whether the cache is valid (should be accessed with atomic
+ * functions so this can be reset by RCU readers)
+ * @data_start: Offset where we know (or strongly assume) is data
+ * @data_end: Offset where the data region ends (which is not necessarily
+ * the start of a zeroed region)
+ */
+typedef struct BdrvBlockStatusCache {
+ struct rcu_head rcu;
+
+ bool valid;
+ int64_t data_start;
+ int64_t data_end;
+} BdrvBlockStatusCache;
+
+struct BlockDriverState {
+ /*
+ * Protected by big QEMU lock or read-only after opening. No special
+ * locking needed during I/O...
+ */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ bool encrypted; /* if true, the media is encrypted */
+ bool sg; /* if true, the device is a /dev/sg* */
+ bool probed; /* if true, format was probed rather than specified */
+ bool force_share; /* if true, always allow all shared permissions */
+ bool implicit; /* if true, this filter node was automatically inserted */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+ /*
+ * long-running tasks intended to always use the same AioContext as this
+ * BDS may register themselves in this list to be notified of changes
+ * regarding this BDS's context
+ */
+ QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+ bool walking_aio_notifiers; /* to make removal during iteration safe */
+
+ char filename[PATH_MAX];
+ /*
+ * If not empty, this image is a diff in relation to backing_file.
+ * Note that this is the name given in the image header and
+ * therefore may or may not be equal to .backing->bs->filename.
+ * If this field contains a relative path, it is to be resolved
+ * relatively to the overlay's location.
+ */
+ char backing_file[PATH_MAX];
+ /*
+ * The backing filename indicated by the image header. Contrary
+ * to backing_file, if we ever open this file, auto_backing_file
+ * is replaced by the resulting BDS's filename (i.e. after a
+ * bdrv_refresh_filename() run).
+ */
+ char auto_backing_file[PATH_MAX];
+ char backing_format[16]; /* if non-zero and backing_file exists */
+
+ QDict *full_open_options;
+ char exact_filename[PATH_MAX];
+
+ BdrvChild *backing;
+ BdrvChild *file;
+
+ /* I/O Limits */
+ BlockLimits bl;
+
+ /*
+ * Flags honored during pread
+ */
+ unsigned int supported_read_flags;
+ /*
+ * Flags honored during pwrite (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_WRITE_UNCHANGED).
+ * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
+ * writes will be issued as normal writes without the flag set.
+ * This is important to note for drivers that do not explicitly
+ * request a WRITE permission for their children and instead take
+ * the same permissions as their parent did (this is commonly what
+ * block filters do). Such drivers have to be aware that the
+ * parent may have taken a WRITE_UNCHANGED permission only and is
+ * issuing such requests. Drivers either must make sure that
+ * these requests do not result in plain WRITE accesses (usually
+ * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
+ * every incoming write request as-is, including potentially that
+ * flag), or they have to explicitly take the WRITE permission for
+ * their children.
+ */
+ unsigned int supported_write_flags;
+ /*
+ * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
+ */
+ unsigned int supported_zero_flags;
+ /*
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
+ *
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
+ * that any added space reads as all zeros. If this can't be guaranteed,
+ * the operation must fail.
+ */
+ unsigned int supported_truncate_flags;
+
+ /* the following member gives a name to every node on the bs graph. */
+ char node_name[32];
+ /* element of the list of named nodes building the graph */
+ QTAILQ_ENTRY(BlockDriverState) node_list;
+ /* element of the list of all BlockDriverStates (all_bdrv_states) */
+ QTAILQ_ENTRY(BlockDriverState) bs_list;
+ /* element of the list of monitor-owned BDS */
+ QTAILQ_ENTRY(BlockDriverState) monitor_list;
+ int refcnt;
+
+ /* operation blockers. Protected by BQL. */
+ QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
+
+ /*
+ * The node that this node inherited default options from (and a reopen on
+ * which can affect this node by changing these defaults). This is always a
+ * parent node of this node.
+ */
+ BlockDriverState *inherits_from;
+ QLIST_HEAD(, BdrvChild) children;
+ QLIST_HEAD(, BdrvChild) parents;
+
+ QDict *options;
+ QDict *explicit_options;
+ BlockdevDetectZeroesOptions detect_zeroes;
+
+ /* The error object in use for blocking operations on backing_hd */
+ Error *backing_blocker;
+
+ /* Protected by AioContext lock */
+
+ /*
+ * If we are reading a disk image, give its size in sectors.
+ * Generally read-only; it is written to by load_snapshot and
+ * save_snaphost, but the block layer is quiescent during those.
+ */
+ int64_t total_sectors;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+
+ /*
+ * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+ * Reading from the list can be done with either the BQL or the
+ * dirty_bitmap_mutex. Modifying a bitmap only requires
+ * dirty_bitmap_mutex.
+ */
+ QemuMutex dirty_bitmap_mutex;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+
+ /* Offset after the highest byte written to */
+ Stat64 wr_highest_offset;
+
+ /*
+ * If true, copy read backing sectors into image. Can be >1 if more
+ * than one client has requested copy-on-read. Accessed with atomic
+ * ops.
+ */
+ int copy_on_read;
+
+ /*
+ * number of in-flight requests; overall and serialising.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ unsigned int serialising_in_flight;
+
+ /*
+ * counter for nested bdrv_io_plug.
+ * Accessed with atomic ops.
+ */
+ unsigned io_plugged;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* Accessed with atomic ops. */
+ int quiesce_counter;
+ int recursive_quiesce_counter;
+
+ unsigned int write_gen; /* Current data generation */
+
+ /* Protected by reqs_lock. */
+ CoMutex reqs_lock;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+ CoQueue flush_queue; /* Serializing flush queue */
+ bool active_flush_req; /* Flush request in flight? */
+
+ /* Only read/written by whoever has set active_flush_req to true. */
+ unsigned int flushed_gen; /* Flushed write generation */
+
+ /* BdrvChild links to this node may never be frozen */
+ bool never_freeze;
+
+ /* Lock for block-status cache RCU writers */
+ CoMutex bsc_modify_lock;
+ /* Always non-NULL, but must only be dereferenced under an RCU read guard */
+ BdrvBlockStatusCache *block_status_cache;
+};
+
+struct BlockBackendRootState {
+ int open_flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+};
+
+typedef enum BlockMirrorBackingMode {
+ /*
+ * Reuse the existing backing chain from the source for the target.
+ * - sync=full: Set backing BDS to NULL.
+ * - sync=top: Use source's backing BDS.
+ * - sync=none: Use source as the backing BDS.
+ */
+ MIRROR_SOURCE_BACKING_CHAIN,
+
+ /* Open the target's backing chain completely anew */
+ MIRROR_OPEN_BACKING_CHAIN,
+
+ /* Do not change the target's backing BDS after job completion */
+ MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
+
+/*
+ * Essential block drivers which must always be statically linked into qemu, and
+ * which therefore can be accessed without using bdrv_find_format()
+ */
+extern BlockDriver bdrv_file;
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_qcow2;
+
+extern unsigned int bdrv_drain_all_count;
+extern QemuOptsList bdrv_create_opts_simple;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-commmon.h for more information about
+ * the Common API.
+ */
+
+static inline BlockDriverState *child_bs(BdrvChild *child)
+{
+ return child ? child->bs : NULL;
+}
+
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
+int get_tmp_filename(char *filename, int size);
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
+ QDict *options);
+
+
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ Error **errp);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+
+#endif /* BLOCK_INT_COMMON_H */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
new file mode 100644
index 0000000..0f21b05
--- /dev/null
+++ b/include/block/block_int-global-state.h
@@ -0,0 +1,329 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_GLOBAL_STATE_H
+#define BLOCK_INT_GLOBAL_STATE_H
+
+#include "block_int-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+/**
+ * stream_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @backing_file_str: The file name that will be written to @bs as the
+ * the new backing file if the job completes. Ignored if @base is %NULL.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the stream job inserts into the graph above
+ * @bs. NULL means that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @backing_file_str in the written image and to @base in the live
+ * BlockDriverState.
+ */
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ BlockDriverState *bottom,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error,
+ const char *filter_node_name,
+ Error **errp);
+
+/**
+ * commit_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ */
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error, const char *backing_file_str,
+ const char *filter_node_name, Error **errp);
+/**
+ * commit_active_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @auto_complete: Auto complete the job.
+ * @errp: Error object.
+ *
+ */
+BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int creation_flags,
+ int64_t speed, BlockdevOnError on_error,
+ const char *filter_node_name,
+ BlockCompletionFunc *cb, void *opaque,
+ bool auto_complete, Error **errp);
+/*
+ * mirror_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ * only be used when full mirroring is selected.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
+ * @zero_target: Whether the target should be explicitly zero-initialized
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @target until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
+ int creation_flags, int64_t speed,
+ uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ bool zero_target,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ bool unmap, const char *filter_node_name,
+ MirrorCopyMode copy_mode, Error **errp);
+
+/*
+ * backup_job_create:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
+ * @bitmap_mode: The bitmap synchronization policy to use.
+ * @perf: Performance options. All actual fields assumed to be present,
+ * all ".has_*" fields are ignored.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
+ *
+ * Create a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+ bool compress,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ int creation_flags,
+ BlockCompletionFunc *cb, void *opaque,
+ JobTxn *txn, Error **errp);
+
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp);
+void bdrv_root_unref_child(BdrvChild *child);
+
+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+ uint64_t *shared_perm);
+
+/**
+ * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
+ * bdrv_child_refresh_perms() instead and make the parent's
+ * .bdrv_child_perm() implementation return the correct values.
+ */
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+
+/**
+ * Calls bs->drv->bdrv_child_perm() and updates the child's permission
+ * masks with the result.
+ * Drivers should invoke this function whenever an event occurs that
+ * makes their .bdrv_child_perm() implementation return different
+ * values than before, but which will not result in the block layer
+ * automatically refreshing the permissions.
+ */
+int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
+
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+/*
+ * Default implementation for BlockDriver.bdrv_child_perm() that can
+ * be used by block filters and image formats, as long as they use the
+ * child_of_bds child class and set an appropriate BdrvChildRole.
+ */
+void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
+bool blk_dev_has_removable_media(BlockBackend *blk);
+void blk_dev_eject_request(BlockBackend *blk, bool force);
+bool blk_dev_is_medium_locked(BlockBackend *blk);
+
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
+
+void bdrv_set_monitor_owned(BlockDriverState *bs);
+
+void blockdev_close_all_bdrv_states(void);
+
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
+
+/**
+ * Simple implementation of bdrv_co_create_opts for protocol drivers
+ * which only support creation via opening a file
+ * (usually existing raw storage device)
+ */
+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+ const char *name,
+ BlockDriverState **pbs,
+ Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
+ BlockDirtyBitmapMergeSourceList *bms,
+ HBitmap **backup, Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
+ bool release,
+ BlockDriverState **bitmap_bs,
+ Error **errp);
+
+
+BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
+
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*aio_context_attached)(AioContext *,
+ void *),
+ void (*aio_context_detached)(void *),
+ void *opaque);
+
+/**
+ * End all quiescent sections started by bdrv_drain_all_begin(). This is
+ * needed when deleting a BDS before bdrv_drain_all_end() is called.
+ *
+ * NOTE: this is an internal helper for bdrv_close() *only*. No one else
+ * should call it.
+ */
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+
+/**
+ * Make sure that the function is running under both drain and BQL.
+ * The latter protects from concurrent writings
+ * from the GS API, while the former prevents concurrent reads
+ * from I/O.
+ */
+static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
+{
+ /*
+ * TODO: this function is incomplete. Because the users of this
+ * assert lack the necessary drains, check only for BQL.
+ * Once the necessary drains are added,
+ * assert also for qatomic_read(&bs->quiesce_counter) > 0
+ */
+ assert(qemu_in_main_thread());
+}
+
+#endif /* BLOCK_INT_GLOBAL_STATE */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
new file mode 100644
index 0000000..bb45420
--- /dev/null
+++ b/include/block/block_int-io.h
@@ -0,0 +1,194 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_IO_H
+#define BLOCK_INT_IO_H
+
+#include "block_int-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+int coroutine_fn bdrv_co_preadv_snapshot(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+int coroutine_fn bdrv_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+int coroutine_fn bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+
+static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
+}
+
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+ uint64_t align);
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
+
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+ const char *filename);
+
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete. A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
+const char *bdrv_get_parent_name(const BlockDriverState *bs);
+bool blk_dev_has_tray(BlockBackend *blk);
+bool blk_dev_is_tray_open(BlockBackend *blk);
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
+bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+ const BdrvDirtyBitmap *src,
+ HBitmap **backup, bool lock);
+
+void bdrv_inc_in_flight(BlockDriverState *bs);
+void bdrv_dec_in_flight(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+BdrvChild *bdrv_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
+BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
+
+static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_or_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_primary_child(bs));
+}
+
+/**
+ * Check whether the given offset is in the cached block-status data
+ * region.
+ *
+ * If it is, and @pnum is not NULL, *pnum is set to
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
+ * @offset, are data (according to the cache).
+ * Otherwise, *pnum is not touched.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
+
+/**
+ * If [offset, offset + bytes) overlaps with the currently cached
+ * block-status region, invalidate the cache.
+ *
+ * (To be used by I/O paths that cause data regions to be zero or
+ * holes.)
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+/**
+ * Mark the range [offset, offset + bytes) as a data region.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+
+#endif /* BLOCK_INT_IO_H */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 27008cf..7d50b6b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -24,1478 +24,9 @@
#ifndef BLOCK_INT_H
#define BLOCK_INT_H
-#include "block/accounting.h"
-#include "block/block.h"
-#include "block/aio-wait.h"
-#include "qemu/queue.h"
-#include "qemu/coroutine.h"
-#include "qemu/stats64.h"
-#include "qemu/timer.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-#include "qemu/throttle.h"
-#include "qemu/rcu.h"
+#include "block_int-global-state.h"
+#include "block_int-io.h"
-#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-
-#define BLOCK_OPT_SIZE "size"
-#define BLOCK_OPT_ENCRYPT "encryption"
-#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
-#define BLOCK_OPT_COMPAT6 "compat6"
-#define BLOCK_OPT_HWVERSION "hwversion"
-#define BLOCK_OPT_BACKING_FILE "backing_file"
-#define BLOCK_OPT_BACKING_FMT "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE "table_size"
-#define BLOCK_OPT_PREALLOC "preallocation"
-#define BLOCK_OPT_SUBFMT "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
-#define BLOCK_OPT_REDUNDANCY "redundancy"
-#define BLOCK_OPT_NOCOW "nocow"
-#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
-#define BLOCK_OPT_OBJECT_SIZE "object_size"
-#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
-#define BLOCK_OPT_DATA_FILE "data_file"
-#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
-#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
-#define BLOCK_OPT_EXTL2 "extended_l2"
-
-#define BLOCK_PROBE_BUF_SIZE 512
-
-enum BdrvTrackedRequestType {
- BDRV_TRACKED_READ,
- BDRV_TRACKED_WRITE,
- BDRV_TRACKED_DISCARD,
- BDRV_TRACKED_TRUNCATE,
-};
-
-/*
- * That is not quite good that BdrvTrackedRequest structure is public,
- * as block/io.c is very careful about incoming offset/bytes being
- * correct. Be sure to assert bdrv_check_request() succeeded after any
- * modification of BdrvTrackedRequest object out of block/io.c
- */
-typedef struct BdrvTrackedRequest {
- BlockDriverState *bs;
- int64_t offset;
- int64_t bytes;
- enum BdrvTrackedRequestType type;
-
- bool serialising;
- int64_t overlap_offset;
- int64_t overlap_bytes;
-
- QLIST_ENTRY(BdrvTrackedRequest) list;
- Coroutine *co; /* owner, used for deadlock detection */
- CoQueue wait_queue; /* coroutines blocked on this request */
-
- struct BdrvTrackedRequest *waiting_for;
-} BdrvTrackedRequest;
-
-int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- Error **errp);
-int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
-
-struct BlockDriver {
- const char *format_name;
- int instance_size;
-
- /* set to true if the BlockDriver is a block filter. Block filters pass
- * certain callbacks that refer to data (see block.c) to their bs->file
- * or bs->backing (whichever one exists) if the driver doesn't implement
- * them. Drivers that do not wish to forward must implement them and return
- * -ENOTSUP.
- * Note that filters are not allowed to modify data.
- *
- * Filters generally cannot have more than a single filtered child,
- * because the data they present must at all times be the same as
- * that on their filtered child. That would be impossible to
- * achieve for multiple filtered children.
- * (And this filtered child must then be bs->file or bs->backing.)
- */
- bool is_filter;
- /*
- * Set to true if the BlockDriver is a format driver. Format nodes
- * generally do not expect their children to be other format nodes
- * (except for backing files), and so format probing is disabled
- * on those children.
- */
- bool is_format;
- /*
- * Return true if @to_replace can be replaced by a BDS with the
- * same data as @bs without it affecting @bs's behavior (that is,
- * without it being visible to @bs's parents).
- */
- bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
- int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
- int (*bdrv_probe_device)(const char *filename);
-
- /* Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation */
- void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
- /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
- * this field set to true, except ones that are defined only by their
- * child's bs.
- * An example of the last type will be the quorum block driver.
- */
- bool bdrv_needs_filename;
-
- /*
- * Set if a driver can support backing files. This also implies the
- * following semantics:
- *
- * - Return status 0 of .bdrv_co_block_status means that corresponding
- * blocks are not allocated in this layer of backing-chain
- * - For such (unallocated) blocks, read will:
- * - fill buffer with zeros if there is no backing file
- * - read from the backing file otherwise, where the block layer
- * takes care of reading zeros beyond EOF if backing file is short
- */
- bool supports_backing;
-
- /* For handling image reopen for split or non-split files */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
- void (*bdrv_join_options)(QDict *options, QDict *old_options);
-
- int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
-
- /* Protocol drivers should implement this instead of bdrv_open */
- int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
- void (*bdrv_close)(BlockDriverState *bs);
-
-
- int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
- Error **errp);
- int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
- BlockdevAmendOptions *opts,
- bool force,
- Error **errp);
-
- int (*bdrv_amend_options)(BlockDriverState *bs,
- QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque,
- bool force,
- Error **errp);
-
- int (*bdrv_make_empty)(BlockDriverState *bs);
-
- /*
- * Refreshes the bs->exact_filename field. If that is impossible,
- * bs->exact_filename has to be left empty.
- */
- void (*bdrv_refresh_filename)(BlockDriverState *bs);
-
- /*
- * Gathers the open options for all children into @target.
- * A simple format driver (without backing file support) might
- * implement this function like this:
- *
- * QINCREF(bs->file->bs->full_open_options);
- * qdict_put(target, "file", bs->file->bs->full_open_options);
- *
- * If not specified, the generic implementation will simply put
- * all children's options under their respective name.
- *
- * @backing_overridden is true when bs->backing seems not to be
- * the child that would result from opening bs->backing_file.
- * Therefore, if it is true, the backing child's options should be
- * gathered; otherwise, there is no need since the backing child
- * is the one implied by the image header.
- *
- * Note that ideally this function would not be needed. Every
- * block driver which implements it is probably doing something
- * shady regarding its runtime option structure.
- */
- void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
- bool backing_overridden);
-
- /*
- * Returns an allocated string which is the directory name of this BDS: It
- * will be used to make relative filenames absolute by prepending this
- * function's return value to them.
- */
- char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
-
- /* aio */
- BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
- int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque);
-
- int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-
- /**
- * @offset: position in bytes to read at
- * @bytes: number of bytes to read
- * @qiov: the buffers to fill with read data
- * @flags: currently unused, always 0
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
- /**
- * @offset: position in bytes to write at
- * @bytes: number of bytes to write
- * @qiov: the buffers containing data to write
- * @flags: zero or more bits allowed by 'supported_write_flags'
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-
- /*
- * Efficiently zero a region of the disk image. Typically an image format
- * would use a compact metadata representation to implement this. This
- * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
- * will be called instead.
- */
- int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
- /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
- * and invoke bdrv_co_copy_range_from(child, ...), or invoke
- * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
- * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
- * operation if @bs is the leaf and @src has the same BlockDriver. Return
- * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t src_offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /*
- * Building block for bdrv_block_status[_above] and
- * bdrv_is_allocated[_above]. The driver should answer only
- * according to the current layer, and should only need to set
- * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
- * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
- * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
- * block.h for the overall meaning of the bits. As a hint, the
- * flag want_zero is true if the caller cares more about precise
- * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
- * overall allocation (favor larger *pnum, perhaps by reporting
- * _DATA instead of _ZERO). The block layer guarantees input
- * clamped to bdrv_getlength() and aligned to request_alignment,
- * as well as non-NULL pnum, map, and file; in turn, the driver
- * must return an error or set pnum to an aligned non-zero value.
- *
- * Note that @bytes is just a hint on how big of a region the
- * caller wants to inspect. It is not a limit on *pnum.
- * Implementations are free to return larger values of *pnum if
- * doing so does not incur a performance penalty.
- *
- * block/io.c's bdrv_co_block_status() will utilize an unclamped
- * *pnum value for the block-status cache on protocol nodes, prior
- * to clamping *pnum for return to its caller.
- */
- int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-
- /*
- * This informs the driver that we are no longer interested in the result
- * of in-flight requests, so don't waste the time if possible.
- *
- * One example usage is to avoid waiting for an nbd target node reconnect
- * timeout during job-cancel with force=true.
- */
- void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
-
- /*
- * Invalidate any cached meta-data.
- */
- void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
- Error **errp);
- int (*bdrv_inactivate)(BlockDriverState *bs);
-
- /*
- * Flushes all data for all layers by calling bdrv_co_flush for underlying
- * layers, if needed. This function is needed for deterministic
- * synchronization of the flush finishing callback.
- */
- int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
-
- /* Delete a created file. */
- int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
- Error **errp);
-
- /*
- * Flushes all data that was already written to the OS all the way down to
- * the disk (for example file-posix.c calls fsync()).
- */
- int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
- /*
- * Flushes all internal caches to the OS. The data may still sit in a
- * writeback cache of the host OS, but it will survive a crash of the qemu
- * process.
- */
- int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
- /*
- * Drivers setting this field must be able to work with just a plain
- * filename with '<protocol_name>:' as a prefix, and no other options.
- * Options may be extracted from the filename by implementing
- * bdrv_parse_filename.
- */
- const char *protocol_name;
-
- /*
- * Truncate @bs to @offset bytes using the given @prealloc mode
- * when growing. Modes other than PREALLOC_MODE_OFF should be
- * rejected when shrinking @bs.
- *
- * If @exact is true, @bs must be resized to exactly @offset.
- * Otherwise, it is sufficient for @bs (if it is a host block
- * device and thus there is no way to resize it) to be at least
- * @offset bytes in length.
- *
- * If @exact is true and this function fails but would succeed
- * with @exact = false, it should return -ENOTSUP.
- */
- int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
- bool exact, PreallocMode prealloc,
- BdrvRequestFlags flags, Error **errp);
-
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
- bool has_variable_length;
- int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
- BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_snapshot_list)(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
- int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
- ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
- Error **errp);
- BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
-
- int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
- int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
-
- int (*bdrv_change_backing_file)(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-
- /* removable device specific */
- bool (*bdrv_is_inserted)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
- void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
- /* to control generic scsi devices */
- BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
- int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf);
-
- /* List of options for creating images, terminated by name == NULL */
- QemuOptsList *create_opts;
-
- /* List of options for image amend */
- QemuOptsList *amend_opts;
-
- /*
- * If this driver supports reopening images this contains a
- * NULL-terminated list of the runtime options that can be
- * modified. If an option in this list is unspecified during
- * reopen then it _must_ be reset to its default value or return
- * an error.
- */
- const char *const *mutable_opts;
-
- /*
- * Returns 0 for completed check, -errno for internal errors.
- * The check results are stored in result.
- */
- int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
- BdrvCheckResult *result,
- BdrvCheckMode fix);
-
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
-
- /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
- int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
- const char *tag);
- int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
- const char *tag);
- int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
- bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
- void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
-
- /*
- * Returns 1 if newly created images are guaranteed to contain only
- * zeros, 0 otherwise.
- */
- int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
- /* Remove fd handlers, timers, and other event loop callbacks so the event
- * loop is no longer in use. Called with no in-flight requests and in
- * depth-first traversal order with parents before child nodes.
- */
- void (*bdrv_detach_aio_context)(BlockDriverState *bs);
-
- /* Add fd handlers, timers, and other event loop callbacks so I/O requests
- * can be processed again. Called with no in-flight requests and in
- * depth-first traversal order with child nodes before parent nodes.
- */
- void (*bdrv_attach_aio_context)(BlockDriverState *bs,
- AioContext *new_context);
-
- /* io queue for linux-aio */
- void (*bdrv_io_plug)(BlockDriverState *bs);
- void (*bdrv_io_unplug)(BlockDriverState *bs);
-
- /**
- * Try to get @bs's logical and physical block size.
- * On success, store them in @bsz and return zero.
- * On failure, return negative errno.
- */
- int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
- /**
- * Try to get @bs's geometry (cyls, heads, sectors)
- * On success, store them in @geo and return 0.
- * On failure return -errno.
- * Only drivers that want to override guest geometry implement this
- * callback; see hd_geometry_guess().
- */
- int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
-
- /**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
- * drain operation to drain and stop any internal sources of requests in
- * the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
- *
- * They should be used by the driver to e.g. manage scheduled I/O
- * requests, or toggle an internal state. After the end of the drain new
- * requests will continue normally.
- */
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
-
- void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
- void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
- Error **errp);
-
- /**
- * Informs the block driver that a permission change is intended. The
- * driver checks whether the change is permissible and may take other
- * preparations for the change (e.g. get file system locks). This operation
- * is always followed either by a call to either .bdrv_set_perm or
- * .bdrv_abort_perm_update.
- *
- * Checks whether the requested set of cumulative permissions in @perm
- * can be granted for accessing @bs and whether no other users are using
- * permissions other than those given in @shared (both arguments take
- * BLK_PERM_* bitmasks).
- *
- * If both conditions are met, 0 is returned. Otherwise, -errno is returned
- * and errp is set to an error describing the conflict.
- */
- int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
- uint64_t shared, Error **errp);
-
- /**
- * Called to inform the driver that the set of cumulative set of used
- * permissions for @bs has changed to @perm, and the set of sharable
- * permission to @shared. The driver can use this to propagate changes to
- * its children (i.e. request permissions only if a parent actually needs
- * them).
- *
- * This function is only invoked after bdrv_check_perm(), so block drivers
- * may rely on preparations made in their .bdrv_check_perm implementation.
- */
- void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
-
- /*
- * Called to inform the driver that after a previous bdrv_check_perm()
- * call, the permission update is not performed and any preparations made
- * for it (e.g. taken file locks) need to be undone.
- *
- * This function can be called even for nodes that never saw a
- * bdrv_check_perm() call. It is a no-op then.
- */
- void (*bdrv_abort_perm_update)(BlockDriverState *bs);
-
- /**
- * Returns in @nperm and @nshared the permissions that the driver for @bs
- * needs on its child @c, based on the cumulative permissions requested by
- * the parents in @parent_perm and @parent_shared.
- *
- * If @c is NULL, return the permissions for attaching a new child for the
- * given @child_class and @role.
- *
- * If @reopen_queue is non-NULL, don't return the currently needed
- * permissions, but those that will be needed after applying the
- * @reopen_queue.
- */
- void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role,
- BlockReopenQueue *reopen_queue,
- uint64_t parent_perm, uint64_t parent_shared,
- uint64_t *nperm, uint64_t *nshared);
-
- bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
- bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
- int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- Error **errp);
-
- /**
- * Register/unregister a buffer for I/O. For example, when the driver is
- * interested to know the memory areas that will later be used in iovs, so
- * that it can do IOMMU mapping with VFIO etc., in order to get better
- * performance. In the case of VFIO drivers, this callback is used to do
- * DMA mapping for hot buffers.
- */
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
- QLIST_ENTRY(BlockDriver) list;
-
- /* Pointer to a NULL-terminated array of names of strong options
- * that can be specified for bdrv_open(). A strong option is one
- * that changes the data of a BDS.
- * If this pointer is NULL, the array is considered empty.
- * "filename" and "driver" are always considered strong. */
- const char *const *strong_runtime_opts;
-};
-
-static inline bool block_driver_can_compress(BlockDriver *drv)
-{
- return drv->bdrv_co_pwritev_compressed ||
- drv->bdrv_co_pwritev_compressed_part;
-}
-
-typedef struct BlockLimits {
- /* Alignment requirement, in bytes, for offset/length of I/O
- * requests. Must be a power of 2 less than INT_MAX; defaults to
- * 1 for drivers with modern byte interfaces, and to 512
- * otherwise. */
- uint32_t request_alignment;
-
- /*
- * Maximum number of bytes that can be discarded at once. Must be multiple
- * of pdiscard_alignment, but need not be power of 2. May be 0 if no
- * inherent 64-bit limit.
- */
- int64_t max_pdiscard;
-
- /* Optimal alignment for discard requests in bytes. A power of 2
- * is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pdiscard if
- * that is set. May be 0 if bl.request_alignment is good enough */
- uint32_t pdiscard_alignment;
-
- /*
- * Maximum number of bytes that can zeroized at once. Must be multiple of
- * pwrite_zeroes_alignment. 0 means no limit.
- */
- int64_t max_pwrite_zeroes;
-
- /* Optimal alignment for write zeroes requests in bytes. A power
- * of 2 is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pwrite_zeroes
- * if that is set. May be 0 if bl.request_alignment is good
- * enough */
- uint32_t pwrite_zeroes_alignment;
-
- /* Optimal transfer length in bytes. A power of 2 is best but not
- * mandatory. Must be a multiple of bl.request_alignment, or 0 if
- * no preferred size */
- uint32_t opt_transfer;
-
- /* Maximal transfer length in bytes. Need not be power of 2, but
- * must be multiple of opt_transfer and bl.request_alignment, or 0
- * for no 32-bit limit. For now, anything larger than INT_MAX is
- * clamped down. */
- uint32_t max_transfer;
-
- /* Maximal hardware transfer length in bytes. Applies whenever
- * transfers to the device bypass the kernel I/O scheduler, for
- * example with SG_IO. If larger than max_transfer or if zero,
- * blk_get_max_hw_transfer will fall back to max_transfer.
- */
- uint64_t max_hw_transfer;
-
- /* Maximal number of scatter/gather elements allowed by the hardware.
- * Applies whenever transfers to the device bypass the kernel I/O
- * scheduler, for example with SG_IO. If larger than max_iov
- * or if zero, blk_get_max_hw_iov will fall back to max_iov.
- */
- int max_hw_iov;
-
- /* memory alignment, in bytes so that no bounce buffer is needed */
- size_t min_mem_alignment;
-
- /* memory alignment, in bytes, for bounce buffer */
- size_t opt_mem_alignment;
-
- /* maximum number of iovec elements */
- int max_iov;
-} BlockLimits;
-
-typedef struct BdrvOpBlocker BdrvOpBlocker;
-
-typedef struct BdrvAioNotifier {
- void (*attached_aio_context)(AioContext *new_context, void *opaque);
- void (*detach_aio_context)(void *opaque);
-
- void *opaque;
- bool deleted;
-
- QLIST_ENTRY(BdrvAioNotifier) list;
-} BdrvAioNotifier;
-
-struct BdrvChildClass {
- /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
- * points to. */
- bool stay_at_node;
-
- /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
- * will return it. This information is used for drain_all, where every node
- * will be drained separately, so the drain only needs to be propagated to
- * non-BDS parents. */
- bool parent_is_bds;
-
- void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
- int *child_flags, QDict *child_options,
- int parent_flags, QDict *parent_options);
-
- void (*change_media)(BdrvChild *child, bool load);
- void (*resize)(BdrvChild *child);
-
- /* Returns a name that is supposedly more useful for human users than the
- * node name for identifying the node in question (in particular, a BB
- * name), or NULL if the parent can't provide a better name. */
- const char *(*get_name)(BdrvChild *child);
-
- /* Returns a malloced string that describes the parent of the child for a
- * human reader. This could be a node-name, BlockBackend name, qdev ID or
- * QOM path of the device owning the BlockBackend, job type and ID etc. The
- * caller is responsible for freeing the memory. */
- char *(*get_parent_desc)(BdrvChild *child);
-
- /*
- * If this pair of functions is implemented, the parent doesn't issue new
- * requests after returning from .drained_begin() until .drained_end() is
- * called.
- *
- * These functions must not change the graph (and therefore also must not
- * call aio_poll(), which could change the graph indirectly).
- *
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
- * Note that this can be nested. If drained_begin() was called twice, new
- * I/O is allowed only after drained_end() was called twice, too.
- */
- void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
-
- /*
- * Returns whether the parent has pending requests for the child. This
- * callback is polled after .drained_begin() has been called until all
- * activity on the child has stopped.
- */
- bool (*drained_poll)(BdrvChild *child);
-
- /* Notifies the parent that the child has been activated/inactivated (e.g.
- * when migration is completing) and it can start/stop requesting
- * permissions and doing I/O on it. */
- void (*activate)(BdrvChild *child, Error **errp);
- int (*inactivate)(BdrvChild *child);
-
- void (*attach)(BdrvChild *child);
- void (*detach)(BdrvChild *child);
-
- /* Notifies the parent that the filename of its child has changed (e.g.
- * because the direct child was removed from the backing chain), so that it
- * can update its reference. */
- int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
- const char *filename, Error **errp);
-
- bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp);
- void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-
- AioContext *(*get_parent_aio_context)(BdrvChild *child);
-};
-
-extern const BdrvChildClass child_of_bds;
-
-struct BdrvChild {
- BlockDriverState *bs;
- char *name;
- const BdrvChildClass *klass;
- BdrvChildRole role;
- void *opaque;
-
- /**
- * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
- */
- uint64_t perm;
-
- /**
- * Permissions that can still be granted to other users of @bs while this
- * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
- */
- uint64_t shared_perm;
-
- /*
- * This link is frozen: the child can neither be replaced nor
- * detached from the parent.
- */
- bool frozen;
-
- /*
- * How many times the parent of this child has been drained
- * (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
- * child is entering or leaving a drained section.
- */
- int parent_quiesce_counter;
-
- QLIST_ENTRY(BdrvChild) next;
- QLIST_ENTRY(BdrvChild) next_parent;
-};
-
-/*
- * Allows bdrv_co_block_status() to cache one data region for a
- * protocol node.
- *
- * @valid: Whether the cache is valid (should be accessed with atomic
- * functions so this can be reset by RCU readers)
- * @data_start: Offset where we know (or strongly assume) is data
- * @data_end: Offset where the data region ends (which is not necessarily
- * the start of a zeroed region)
- */
-typedef struct BdrvBlockStatusCache {
- struct rcu_head rcu;
-
- bool valid;
- int64_t data_start;
- int64_t data_end;
-} BdrvBlockStatusCache;
-
-struct BlockDriverState {
- /* Protected by big QEMU lock or read-only after opening. No special
- * locking needed during I/O...
- */
- int open_flags; /* flags used to open the file, re-used for re-open */
- bool encrypted; /* if true, the media is encrypted */
- bool sg; /* if true, the device is a /dev/sg* */
- bool probed; /* if true, format was probed rather than specified */
- bool force_share; /* if true, always allow all shared permissions */
- bool implicit; /* if true, this filter node was automatically inserted */
-
- BlockDriver *drv; /* NULL means no media */
- void *opaque;
-
- AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
- /* long-running tasks intended to always use the same AioContext as this
- * BDS may register themselves in this list to be notified of changes
- * regarding this BDS's context */
- QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
- bool walking_aio_notifiers; /* to make removal during iteration safe */
-
- char filename[PATH_MAX];
- /*
- * If not empty, this image is a diff in relation to backing_file.
- * Note that this is the name given in the image header and
- * therefore may or may not be equal to .backing->bs->filename.
- * If this field contains a relative path, it is to be resolved
- * relatively to the overlay's location.
- */
- char backing_file[PATH_MAX];
- /*
- * The backing filename indicated by the image header. Contrary
- * to backing_file, if we ever open this file, auto_backing_file
- * is replaced by the resulting BDS's filename (i.e. after a
- * bdrv_refresh_filename() run).
- */
- char auto_backing_file[PATH_MAX];
- char backing_format[16]; /* if non-zero and backing_file exists */
-
- QDict *full_open_options;
- char exact_filename[PATH_MAX];
-
- BdrvChild *backing;
- BdrvChild *file;
-
- /* I/O Limits */
- BlockLimits bl;
-
- /*
- * Flags honored during pread
- */
- unsigned int supported_read_flags;
- /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
- * BDRV_REQ_WRITE_UNCHANGED).
- * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
- * writes will be issued as normal writes without the flag set.
- * This is important to note for drivers that do not explicitly
- * request a WRITE permission for their children and instead take
- * the same permissions as their parent did (this is commonly what
- * block filters do). Such drivers have to be aware that the
- * parent may have taken a WRITE_UNCHANGED permission only and is
- * issuing such requests. Drivers either must make sure that
- * these requests do not result in plain WRITE accesses (usually
- * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
- * every incoming write request as-is, including potentially that
- * flag), or they have to explicitly take the WRITE permission for
- * their children. */
- unsigned int supported_write_flags;
- /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
- * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
- unsigned int supported_zero_flags;
- /*
- * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
- *
- * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
- * that any added space reads as all zeros. If this can't be guaranteed,
- * the operation must fail.
- */
- unsigned int supported_truncate_flags;
-
- /* the following member gives a name to every node on the bs graph. */
- char node_name[32];
- /* element of the list of named nodes building the graph */
- QTAILQ_ENTRY(BlockDriverState) node_list;
- /* element of the list of all BlockDriverStates (all_bdrv_states) */
- QTAILQ_ENTRY(BlockDriverState) bs_list;
- /* element of the list of monitor-owned BDS */
- QTAILQ_ENTRY(BlockDriverState) monitor_list;
- int refcnt;
-
- /* operation blockers */
- QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
-
- /* The node that this node inherited default options from (and a reopen on
- * which can affect this node by changing these defaults). This is always a
- * parent node of this node. */
- BlockDriverState *inherits_from;
- QLIST_HEAD(, BdrvChild) children;
- QLIST_HEAD(, BdrvChild) parents;
-
- QDict *options;
- QDict *explicit_options;
- BlockdevDetectZeroesOptions detect_zeroes;
-
- /* The error object in use for blocking operations on backing_hd */
- Error *backing_blocker;
-
- /* Protected by AioContext lock */
-
- /* If we are reading a disk image, give its size in sectors.
- * Generally read-only; it is written to by load_snapshot and
- * save_snaphost, but the block layer is quiescent during those.
- */
- int64_t total_sectors;
-
- /* threshold limit for writes, in bytes. "High water mark". */
- uint64_t write_threshold_offset;
-
- /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
- * Reading from the list can be done with either the BQL or the
- * dirty_bitmap_mutex. Modifying a bitmap only requires
- * dirty_bitmap_mutex. */
- QemuMutex dirty_bitmap_mutex;
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
-
- /* Offset after the highest byte written to */
- Stat64 wr_highest_offset;
-
- /* If true, copy read backing sectors into image. Can be >1 if more
- * than one client has requested copy-on-read. Accessed with atomic
- * ops.
- */
- int copy_on_read;
-
- /* number of in-flight requests; overall and serialising.
- * Accessed with atomic ops.
- */
- unsigned int in_flight;
- unsigned int serialising_in_flight;
-
- /* counter for nested bdrv_io_plug.
- * Accessed with atomic ops.
- */
- unsigned io_plugged;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* Accessed with atomic ops. */
- int quiesce_counter;
- int recursive_quiesce_counter;
-
- unsigned int write_gen; /* Current data generation */
-
- /* Protected by reqs_lock. */
- CoMutex reqs_lock;
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
- CoQueue flush_queue; /* Serializing flush queue */
- bool active_flush_req; /* Flush request in flight? */
-
- /* Only read/written by whoever has set active_flush_req to true. */
- unsigned int flushed_gen; /* Flushed write generation */
-
- /* BdrvChild links to this node may never be frozen */
- bool never_freeze;
-
- /* Lock for block-status cache RCU writers */
- CoMutex bsc_modify_lock;
- /* Always non-NULL, but must only be dereferenced under an RCU read guard */
- BdrvBlockStatusCache *block_status_cache;
-};
-
-struct BlockBackendRootState {
- int open_flags;
- BlockdevDetectZeroesOptions detect_zeroes;
-};
-
-typedef enum BlockMirrorBackingMode {
- /* Reuse the existing backing chain from the source for the target.
- * - sync=full: Set backing BDS to NULL.
- * - sync=top: Use source's backing BDS.
- * - sync=none: Use source as the backing BDS. */
- MIRROR_SOURCE_BACKING_CHAIN,
-
- /* Open the target's backing chain completely anew */
- MIRROR_OPEN_BACKING_CHAIN,
-
- /* Do not change the target's backing BDS after job completion */
- MIRROR_LEAVE_BACKING_CHAIN,
-} BlockMirrorBackingMode;
-
-
-/* Essential block drivers which must always be statically linked into qemu, and
- * which therefore can be accessed without using bdrv_find_format() */
-extern BlockDriver bdrv_file;
-extern BlockDriver bdrv_raw;
-extern BlockDriver bdrv_qcow2;
-
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
-}
-
-extern unsigned int bdrv_drain_all_count;
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
-bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
- uint64_t align);
-BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
-
-int get_tmp_filename(char *filename, int size);
-BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
- const char *filename);
-
-void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
- QDict *options);
-
-/**
- * bdrv_add_aio_context_notifier:
- *
- * If a long-running job intends to be always run in the same AioContext as a
- * certain BDS, it may use this function to be notified of changes regarding the
- * association of the BDS to an AioContext.
- *
- * attached_aio_context() is called after the target BDS has been attached to a
- * new AioContext; detach_aio_context() is called before the target BDS is being
- * detached from its old AioContext.
- */
-void bdrv_add_aio_context_notifier(BlockDriverState *bs,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-
-/**
- * bdrv_remove_aio_context_notifier:
- *
- * Unsubscribe of change notifications regarding the BDS's AioContext. The
- * parameters given here have to be the same as those given to
- * bdrv_add_aio_context_notifier().
- */
-void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
- void (*aio_context_attached)(AioContext *,
- void *),
- void (*aio_context_detached)(void *),
- void *opaque);
-
-/**
- * bdrv_wakeup:
- * @bs: The BlockDriverState for which an I/O operation has been completed.
- *
- * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
- * synchronous I/O on a BlockDriverState that is attached to another
- * I/O thread, the main thread lets the I/O thread's event loop run,
- * waiting for the I/O operation to complete. A bdrv_wakeup will wake
- * up the main thread if necessary.
- *
- * Manual calls to bdrv_wakeup are rarely necessary, because
- * bdrv_dec_in_flight already calls it.
- */
-void bdrv_wakeup(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-
-/**
- * stream_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @backing_file_str: The file name that will be written to @bs as the
- * the new backing file if the job completes. Ignored if @base is %NULL.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the stream job inserts into the graph above
- * @bs. NULL means that a node name should be autogenerated.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs. Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs. At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @backing_file_str in the written image and to @base in the live
- * BlockDriverState.
- */
-void stream_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, const char *backing_file_str,
- BlockDriverState *bottom,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error,
- const char *filter_node_name,
- Error **errp);
-
-/**
- * commit_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device.
- * @top: Top block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @backing_file_str: String to use as the backing file in @top's overlay
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @top. NULL means
- * that a node name should be autogenerated.
- * @errp: Error object.
- *
- */
-void commit_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, BlockDriverState *top,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error, const char *backing_file_str,
- const char *filter_node_name, Error **errp);
-/**
- * commit_active_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @auto_complete: Auto complete the job.
- * @errp: Error object.
- *
- */
-BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, int creation_flags,
- int64_t speed, BlockdevOnError on_error,
- const char *filter_node_name,
- BlockCompletionFunc *cb, void *opaque,
- bool auto_complete, Error **errp);
-/*
- * mirror_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @replaces: Block graph node name to replace once the mirror is done. Can
- * only be used when full mirroring is selected.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @backing_mode: How to establish the target's backing chain after completion.
- * @zero_target: Whether the target should be explicitly zero-initialized
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @unmap: Whether to unmap target where source sectors only contain zeroes.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the mirror job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @copy_mode: When to trigger writes to the target.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs. Clusters that are allocated
- * in @bs will be written to @target until the job is cancelled or
- * manually completed. At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, const char *replaces,
- int creation_flags, int64_t speed,
- uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
- bool zero_target,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- bool unmap, const char *filter_node_name,
- MirrorCopyMode copy_mode, Error **errp);
-
-/*
- * backup_job_create:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @sync_mode: What parts of the disk image should be copied to the destination.
- * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
- * @bitmap_mode: The bitmap synchronization policy to use.
- * @perf: Performance options. All actual fields assumed to be present,
- * all ".has_*" fields are ignored.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @txn: Transaction that this job is part of (may be NULL).
- *
- * Create a backup operation on @bs. Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, int64_t speed,
- MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
- BitmapSyncMode bitmap_mode,
- bool compress,
- const char *filter_node_name,
- BackupPerf *perf,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- int creation_flags,
- BlockCompletionFunc *cb, void *opaque,
- JobTxn *txn, Error **errp);
-
-BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- uint64_t perm, uint64_t shared_perm,
- void *opaque, Error **errp);
-void bdrv_root_unref_child(BdrvChild *child);
-
-void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
- uint64_t *shared_perm);
-
-/**
- * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
- * bdrv_child_refresh_perms() instead and make the parent's
- * .bdrv_child_perm() implementation return the correct values.
- */
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
- Error **errp);
-
-/**
- * Calls bs->drv->bdrv_child_perm() and updates the child's permission
- * masks with the result.
- * Drivers should invoke this function whenever an event occurs that
- * makes their .bdrv_child_perm() implementation return different
- * values than before, but which will not result in the block layer
- * automatically refreshing the permissions.
- */
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
-
-bool bdrv_recurse_can_replace(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
-/*
- * Default implementation for BlockDriver.bdrv_child_perm() that can
- * be used by block filters and image formats, as long as they use the
- * child_of_bds child class and set an appropriate BdrvChildRole.
- */
-void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role, BlockReopenQueue *reopen_queue,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared);
-
-const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
-bool blk_dev_has_removable_media(BlockBackend *blk);
-bool blk_dev_has_tray(BlockBackend *blk);
-void blk_dev_eject_request(BlockBackend *blk, bool force);
-bool blk_dev_is_tray_open(BlockBackend *blk);
-bool blk_dev_is_medium_locked(BlockBackend *blk);
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
- const BdrvDirtyBitmap *src,
- HBitmap **backup, bool lock);
-
-void bdrv_inc_in_flight(BlockDriverState *bs);
-void bdrv_dec_in_flight(BlockDriverState *bs);
-
-void blockdev_close_all_bdrv_states(void);
-
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
-
-void bdrv_set_monitor_owned(BlockDriverState *bs);
-BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
-
-/**
- * Simple implementation of bdrv_co_create_opts for protocol drivers
- * which only support creation via opening a file
- * (usually existing raw storage device)
- */
-int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-extern QemuOptsList bdrv_create_opts_simple;
-
-BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
- const char *name,
- BlockDriverState **pbs,
- Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
- BlockDirtyBitmapMergeSourceList *bms,
- HBitmap **backup, Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
- bool release,
- BlockDriverState **bitmap_bs,
- Error **errp);
-
-BdrvChild *bdrv_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_primary_child(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
-
-static inline BlockDriverState *child_bs(BdrvChild *child)
-{
- return child ? child->bs : NULL;
-}
-
-static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_or_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_primary_child(bs));
-}
-
-/**
- * End all quiescent sections started by bdrv_drain_all_begin(). This is
- * needed when deleting a BDS before bdrv_drain_all_end() is called.
- *
- * NOTE: this is an internal helper for bdrv_close() *only*. No one else
- * should call it.
- */
-void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
-
-/**
- * Check whether the given offset is in the cached block-status data
- * region.
- *
- * If it is, and @pnum is not NULL, *pnum is set to
- * `bsc.data_end - offset`, i.e. how many bytes, starting from
- * @offset, are data (according to the cache).
- * Otherwise, *pnum is not touched.
- */
-bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
-
-/**
- * If [offset, offset + bytes) overlaps with the currently cached
- * block-status region, invalidate the cache.
- *
- * (To be used by I/O paths that cause data regions to be zero or
- * holes.)
- */
-void bdrv_bsc_invalidate_range(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
-/**
- * Mark the range [offset, offset + bytes) as a data region.
- */
-void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 87fbb39..6525e16 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -74,6 +74,13 @@ typedef struct BlockJob {
GSList *nodes;
} BlockJob;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_next:
* @job: A block job, or %NULL.
@@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
*/
void block_job_iostatus_reset(BlockJob *job);
+/*
+ * block_job_get_aio_context:
+ *
+ * Returns aio context associated with a block job.
+ */
+AioContext *block_job_get_aio_context(BlockJob *job);
+
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-common.h for more information about
+ * the Common API.
+ */
+
/**
* block_job_is_internal:
* @job: The job to determine if it is user-visible or not.
@@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job);
*/
const BlockJobDriver *block_job_driver(BlockJob *job);
-/*
- * block_job_get_aio_context:
- *
- * Returns aio context associated with a block job.
- */
-AioContext *block_job_get_aio_context(BlockJob *job);
-
#endif
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 6633d83..6bd9ae2 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -39,6 +39,13 @@ struct BlockJobDriver {
JobDriver job_driver;
/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
* Returns whether the job has pending requests for the child or will
* submit new requests before the next pause point. This callback is polled
* in the context of draining a job node after requesting that the job be
@@ -47,6 +54,13 @@ struct BlockJobDriver {
bool (*drained_poll)(BlockJob *job);
/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
* If the callback is not NULL, it will be invoked before the job is
* resumed in a new AioContext. This is the place to move any resources
* besides job->blk to the new AioContext.
@@ -56,6 +70,13 @@ struct BlockJobDriver {
void (*set_speed)(BlockJob *job, int64_t speed);
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_create:
* @job_id: The id of the newly-created job, or %NULL to have one
@@ -98,6 +119,13 @@ void block_job_free(Job *job);
*/
void block_job_user_resume(Job *job);
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
/**
* block_job_ratelimit_get_delay:
*
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 40950ae..6528336 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -77,7 +77,7 @@ void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap,
bool persistent);
void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap);
void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy);
-void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
HBitmap **backup, Error **errp);
void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip);
bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset);
@@ -115,6 +115,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset,
bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
int64_t start, int64_t end, int64_t max_dirty_count,
int64_t *dirty_start, int64_t *dirty_count);
+bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+ int64_t bytes, int64_t *count);
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
Error **errp);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index cd068ac..3737351 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -695,7 +695,8 @@ typedef struct QEMU_PACKED NvmeRwCmd {
uint8_t flags;
uint16_t cid;
uint32_t nsid;
- uint64_t rsvd2;
+ uint32_t cdw2;
+ uint32_t cdw3;
uint64_t mptr;
NvmeCmdDptr dptr;
uint64_t slba;
@@ -731,7 +732,6 @@ enum {
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
NVME_RW_PRINFO_PRCHK_MASK = 7 << 10,
-
};
#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
@@ -770,6 +770,7 @@ typedef struct QEMU_PACKED NvmeDsmRange {
enum {
NVME_COPY_FORMAT_0 = 0x0,
+ NVME_COPY_FORMAT_1 = 0x1,
};
typedef struct QEMU_PACKED NvmeCopyCmd {
@@ -777,7 +778,9 @@ typedef struct QEMU_PACKED NvmeCopyCmd {
uint8_t flags;
uint16_t cid;
uint32_t nsid;
- uint32_t rsvd2[4];
+ uint32_t cdw2;
+ uint32_t cdw3;
+ uint32_t rsvd2[2];
NvmeCmdDptr dptr;
uint64_t sdlba;
uint8_t nr;
@@ -789,7 +792,7 @@ typedef struct QEMU_PACKED NvmeCopyCmd {
uint16_t appmask;
} NvmeCopyCmd;
-typedef struct QEMU_PACKED NvmeCopySourceRange {
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat0 {
uint8_t rsvd0[8];
uint64_t slba;
uint16_t nlb;
@@ -797,7 +800,17 @@ typedef struct QEMU_PACKED NvmeCopySourceRange {
uint32_t reftag;
uint16_t apptag;
uint16_t appmask;
-} NvmeCopySourceRange;
+} NvmeCopySourceRangeFormat0;
+
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat1 {
+ uint8_t rsvd0[8];
+ uint64_t slba;
+ uint16_t nlb;
+ uint8_t rsvd18[8];
+ uint8_t sr[10];
+ uint16_t apptag;
+ uint16_t appmask;
+} NvmeCopySourceRangeFormat1;
enum NvmeAsyncEventRequest {
NVME_AER_TYPE_ERROR = 0,
@@ -908,6 +921,7 @@ enum NvmeStatusCodes {
NVME_CMP_FAILURE = 0x0285,
NVME_ACCESS_DENIED = 0x0286,
NVME_DULB = 0x0287,
+ NVME_E2E_STORAGE_TAG_ERROR = 0x0288,
NVME_MORE = 0x2000,
NVME_DNR = 0x4000,
NVME_NO_COMPLETE = 0xffff,
@@ -1111,6 +1125,10 @@ enum NvmeIdCtrlOaes {
NVME_OAES_NS_ATTR = 1 << 8,
};
+enum NvmeIdCtrlCtratt {
+ NVME_CTRATT_ELBAS = 1 << 15,
+};
+
enum NvmeIdCtrlOacs {
NVME_OACS_SECURITY = 1 << 0,
NVME_OACS_FORMAT = 1 << 1,
@@ -1131,7 +1149,8 @@ enum NvmeIdCtrlOncs {
};
enum NvmeIdCtrlOcfs {
- NVME_OCFS_COPY_FORMAT_0 = 1 << 0,
+ NVME_OCFS_COPY_FORMAT_0 = 1 << NVME_COPY_FORMAT_0,
+ NVME_OCFS_COPY_FORMAT_1 = 1 << NVME_COPY_FORMAT_1,
};
enum NvmeIdctrlVwc {
@@ -1216,6 +1235,7 @@ enum NvmeFeatureIds {
NVME_WRITE_ATOMICITY = 0xa,
NVME_ASYNCHRONOUS_EVENT_CONF = 0xb,
NVME_TIMESTAMP = 0xe,
+ NVME_HOST_BEHAVIOR_SUPPORT = 0x16,
NVME_COMMAND_SET_PROFILE = 0x19,
NVME_SOFTWARE_PROGRESS_MARKER = 0x80,
NVME_FID_MAX = 0x100,
@@ -1257,6 +1277,13 @@ typedef struct QEMU_PACKED NvmeRangeType {
uint8_t rsvd48[16];
} NvmeRangeType;
+typedef struct NvmeHostBehaviorSupport {
+ uint8_t acre;
+ uint8_t etdas;
+ uint8_t lbafee;
+ uint8_t rsvd3[509];
+} NvmeHostBehaviorSupport;
+
typedef struct QEMU_PACKED NvmeLBAF {
uint16_t ms;
uint8_t ds;
@@ -1270,6 +1297,7 @@ typedef struct QEMU_PACKED NvmeLBAFE {
} NvmeLBAFE;
#define NVME_NSID_BROADCAST 0xffffffff
+#define NVME_MAX_NLBAF 64
typedef struct QEMU_PACKED NvmeIdNs {
uint64_t nsze;
@@ -1304,11 +1332,20 @@ typedef struct QEMU_PACKED NvmeIdNs {
uint8_t rsvd81[23];
uint8_t nguid[16];
uint64_t eui64;
- NvmeLBAF lbaf[16];
- uint8_t rsvd192[192];
+ NvmeLBAF lbaf[NVME_MAX_NLBAF];
uint8_t vs[3712];
} NvmeIdNs;
+#define NVME_ID_NS_NVM_ELBAF_PIF(elbaf) (((elbaf) >> 7) & 0x3)
+
+typedef struct QEMU_PACKED NvmeIdNsNvm {
+ uint64_t lbstm;
+ uint8_t pic;
+ uint8_t rsvd9[3];
+ uint32_t elbaf[NVME_MAX_NLBAF];
+ uint8_t rsvd268[3828];
+} NvmeIdNsNvm;
+
typedef struct QEMU_PACKED NvmeIdNsDescr {
uint8_t nidt;
uint8_t nidl;
@@ -1410,10 +1447,23 @@ enum NvmeIdNsMc {
#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
-typedef struct NvmeDifTuple {
- uint16_t guard;
- uint16_t apptag;
- uint32_t reftag;
+enum NvmePIFormat {
+ NVME_PI_GUARD_16 = 0,
+ NVME_PI_GUARD_64 = 2,
+};
+
+typedef union NvmeDifTuple {
+ struct {
+ uint16_t guard;
+ uint16_t apptag;
+ uint32_t reftag;
+ } g16;
+
+ struct {
+ uint64_t guard;
+ uint16_t apptag;
+ uint8_t sr[6];
+ } g64;
} NvmeDifTuple;
enum NvmeZoneAttr {
@@ -1510,7 +1560,8 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8);
QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
- QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRange) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat0) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat1) != 40);
QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64);
@@ -1520,6 +1571,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
@@ -1530,10 +1582,11 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
- QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16);
}
#endif
diff --git a/include/block/reqlist.h b/include/block/reqlist.h
new file mode 100644
index 0000000..5253497
--- /dev/null
+++ b/include/block/reqlist.h
@@ -0,0 +1,75 @@
+/*
+ * reqlist API
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Authors:
+ * Dietmar Maurer (dietmar@proxmox.com)
+ * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REQLIST_H
+#define REQLIST_H
+
+#include "qemu/coroutine.h"
+
+/*
+ * The API is not thread-safe and shouldn't be. The struct is public to be part
+ * of other structures and protected by third-party locks, see
+ * block/block-copy.c for example.
+ */
+
+typedef struct BlockReq {
+ int64_t offset;
+ int64_t bytes;
+
+ CoQueue wait_queue; /* coroutines blocked on this req */
+ QLIST_ENTRY(BlockReq) list;
+} BlockReq;
+
+typedef QLIST_HEAD(, BlockReq) BlockReqList;
+
+/*
+ * Initialize new request and add it to the list. Caller must be sure that
+ * there are no conflicting requests in the list.
+ */
+void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+ int64_t bytes);
+/* Search for request in the list intersecting with @offset/@bytes area. */
+BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset,
+ int64_t bytes);
+
+/*
+ * If there are no intersecting requests return false. Otherwise, wait for the
+ * first found intersecting request to finish and return true.
+ *
+ * @lock is passed to qemu_co_queue_wait()
+ * False return value proves that lock was released at no point.
+ */
+bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock);
+
+/*
+ * Wait for all intersecting requests. It just calls reqlist_wait_one() in a
+ * loop, caller is responsible to stop producing new requests in this region
+ * in parallel, otherwise reqlist_wait_all() may never return.
+ */
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock);
+
+/*
+ * Shrink request and wake all waiting coroutines (maybe some of them are not
+ * intersecting with shrunk request).
+ */
+void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes);
+
+/*
+ * Remove request and wake all waiting coroutines. Do not release any memory.
+ */
+void coroutine_fn reqlist_remove_req(BlockReq *req);
+
+#endif /* REQLIST_H */
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 9403456..50ff924 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo {
uint64_t icount; /* record/replay step */
} QEMUSnapshotInfo;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name);
bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
Error **errp);
-/* Group operations. All block drivers are involved.
+/*
+ * Group operations. All block drivers are involved.
* These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers */
+ * when appropriate for appropriate block drivers
+ */
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 84caf5c..c0f0fab 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -433,10 +433,6 @@ int cpu_exec(CPUState *cpu);
void tcg_exec_realizefn(CPUState *cpu, Error **errp);
void tcg_exec_unrealizefn(CPUState *cpu);
-/* Returns: 0 on success, -1 on error */
-int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, bool is_write);
-
/**
* cpu_set_cpustate_pointers(cpu)
* @cpu: The cpu object
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index de5f444..7f7b594 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -7,6 +7,18 @@
#include "exec/hwaddr.h"
#endif
+/**
+ * vaddr:
+ * Type wide enough to contain any #target_ulong virtual address.
+ */
+typedef uint64_t vaddr;
+#define VADDR_PRId PRId64
+#define VADDR_PRIu PRIu64
+#define VADDR_PRIo PRIo64
+#define VADDR_PRIx PRIx64
+#define VADDR_PRIX PRIX64
+#define VADDR_MAX UINT64_MAX
+
/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
* when intptr_t is 32-bit and we are aligning a long long.
*/
@@ -78,6 +90,28 @@ void qemu_ram_unset_migratable(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
size_t qemu_ram_pagesize_largest(void);
+/**
+ * cpu_address_space_init:
+ * @cpu: CPU to add this address space to
+ * @asidx: integer index of this address space
+ * @prefix: prefix to be used as name of address space
+ * @mr: the root memory region of address space
+ *
+ * Add the specified address space to the CPU's cpu_ases list.
+ * The address space added with @asidx 0 is the one used for the
+ * convenience pointer cpu->as.
+ * The target-specific code which registers ASes is responsible
+ * for defining what semantics address space 0, 1, 2, etc have.
+ *
+ * Before the first call to this function, the caller must set
+ * cpu->num_ases to the total number of address spaces it needs
+ * to support.
+ *
+ * Note that with KVM only one address space is supported.
+ */
+void cpu_address_space_init(CPUState *cpu, int asidx,
+ const char *prefix, MemoryRegion *mr);
+
void cpu_physical_memory_rw(hwaddr addr, void *buf,
hwaddr len, bool is_write);
static inline void cpu_physical_memory_read(hwaddr addr,
@@ -90,6 +124,7 @@ static inline void cpu_physical_memory_write(hwaddr addr,
{
cpu_physical_memory_rw(addr, (void *)buf, len, true);
}
+void cpu_reloading_memory_map(void);
void *cpu_physical_memory_map(hwaddr addr,
hwaddr *plen,
bool is_write);
@@ -116,6 +151,10 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
#endif
+/* Returns: 0 on success, -1 on error */
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+ void *ptr, size_t len, bool is_write);
+
/* vl.c */
extern int singlestep;
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index da987fe..6adacf8 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -64,6 +64,7 @@
#include "exec/memopidx.h"
#include "qemu/int128.h"
+#include "cpu.h"
#if defined(CONFIG_USER_ONLY)
/* sparc32plus has 64bit long but 32bit space address
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 227e10b..d2cb098 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -24,7 +24,6 @@
#ifdef CONFIG_TCG
#include "exec/cpu_ldst.h"
#endif
-#include "sysemu/cpu-timers.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@@ -81,31 +80,6 @@ static inline bool cpu_loop_exit_requested(CPUState *cpu)
return (int32_t)qatomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0;
}
-#if !defined(CONFIG_USER_ONLY)
-void cpu_reloading_memory_map(void);
-/**
- * cpu_address_space_init:
- * @cpu: CPU to add this address space to
- * @asidx: integer index of this address space
- * @prefix: prefix to be used as name of address space
- * @mr: the root memory region of address space
- *
- * Add the specified address space to the CPU's cpu_ases list.
- * The address space added with @asidx 0 is the one used for the
- * convenience pointer cpu->as.
- * The target-specific code which registers ASes is responsible
- * for defining what semantics address space 0, 1, 2, etc have.
- *
- * Before the first call to this function, the caller must set
- * cpu->num_ases to the total number of address spaces it needs
- * to support.
- *
- * Note that with KVM only one address space is supported.
- */
-void cpu_address_space_init(CPUState *cpu, int asidx,
- const char *prefix, MemoryRegion *mr);
-#endif
-
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
/* cputlb.c */
/**
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index a024a03..89edf94 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -45,17 +45,6 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...);
*/
void gdb_do_syscallv(gdb_syscall_complete_cb cb, const char *fmt, va_list va);
int use_gdb_syscalls(void);
-void gdb_set_stop_cpu(CPUState *cpu);
-
-/**
- * gdb_exit: exit gdb session, reporting inferior status
- * @code: exit code reported
- *
- * This closes the session and sends a final packet to GDB reporting
- * the exit status of the program. It also cleans up any connections
- * detritus before returning.
- */
-void gdb_exit(int code);
#ifdef CONFIG_USER_ONLY
/**
@@ -165,7 +154,7 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
#define ldtul_p(addr) ldl_p(addr)
#endif
-#endif
+#endif /* NEED_CPU_H */
/**
* gdbserver_start: start the gdb server
@@ -178,6 +167,18 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
int gdbserver_start(const char *port_or_device);
/**
+ * gdb_exit: exit gdb session, reporting inferior status
+ * @code: exit code reported
+ *
+ * This closes the session and sends a final packet to GDB reporting
+ * the exit status of the program. It also cleans up any connections
+ * detritus before returning.
+ */
+void gdb_exit(int code);
+
+void gdb_set_stop_cpu(CPUState *cpu);
+
+/**
* gdb_has_xml:
* This is an ugly hack to cope with both new and old gdb.
* If gdb sends qXfer:features:read then assume we're talking to a newish
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 7ad4ad1..7c5c02f 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -51,8 +51,6 @@
#pragma GCC poison TARGET_PAGE_BITS
#pragma GCC poison TARGET_PAGE_ALIGN
-#pragma GCC poison CPUArchState
-
#pragma GCC poison CPU_INTERRUPT_HARD
#pragma GCC poison CPU_INTERRUPT_EXITTB
#pragma GCC poison CPU_INTERRUPT_HALT
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index c1ea17d..7e76ee2 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -132,6 +132,7 @@ struct VirtMachineClass {
bool no_secure_gpio;
/* Machines < 6.2 have no support for describing cpu topology to guest */
bool no_cpu_topology;
+ bool no_tcg_lpa2;
};
struct VirtMachineState {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 76ab3b8..0efc615 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -22,6 +22,7 @@
#include "hw/qdev-core.h"
#include "disas/dis-asm.h"
+#include "exec/cpu-common.h"
#include "exec/hwaddr.h"
#include "exec/memattrs.h"
#include "qapi/qapi-types-run-state.h"
@@ -36,18 +37,6 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
void *opaque);
/**
- * vaddr:
- * Type wide enough to contain any #target_ulong virtual address.
- */
-typedef uint64_t vaddr;
-#define VADDR_PRId PRId64
-#define VADDR_PRIu PRIu64
-#define VADDR_PRIo PRIo64
-#define VADDR_PRIx PRIx64
-#define VADDR_PRIX PRIX64
-#define VADDR_MAX UINT64_MAX
-
-/**
* SECTION:cpu
* @section_id: QEMU-cpu
* @title: CPU Class
@@ -66,6 +55,24 @@ typedef struct CPUClass CPUClass;
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
TYPE_CPU)
+/**
+ * OBJECT_DECLARE_CPU_TYPE:
+ * @CpuInstanceType: instance struct name
+ * @CpuClassType: class struct name
+ * @CPU_MODULE_OBJ_NAME: the CPU name in uppercase with underscore separators
+ *
+ * This macro is typically used in "cpu-qom.h" header file, and will:
+ *
+ * - create the typedefs for the CPU object and class structs
+ * - register the type for use with g_autoptr
+ * - provide three standard type cast functions
+ *
+ * The object struct and class struct need to be declared manually.
+ */
+#define OBJECT_DECLARE_CPU_TYPE(CpuInstanceType, CpuClassType, CPU_MODULE_OBJ_NAME) \
+ typedef struct ArchCPU CpuInstanceType; \
+ OBJECT_DECLARE_TYPE(ArchCPU, CpuClassType, CPU_MODULE_OBJ_NAME);
+
typedef enum MMUAccessType {
MMU_DATA_LOAD = 0,
MMU_DATA_STORE = 1,
@@ -351,7 +358,7 @@ struct CPUState {
AddressSpace *as;
MemoryRegion *memory;
- void *env_ptr; /* CPUArchState */
+ CPUArchState *env_ptr;
IcountDecr *icount_decr_ptr;
/* Accessed in parallel; all accesses must be atomic */
diff --git a/include/hw/intc/riscv_imsic.h b/include/hw/intc/riscv_imsic.h
new file mode 100644
index 0000000..58c2aaa
--- /dev/null
+++ b/include/hw/intc/riscv_imsic.h
@@ -0,0 +1,68 @@
+/*
+ * RISC-V IMSIC (Incoming Message Signal Interrupt Controller) interface
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IMSIC_H
+#define HW_RISCV_IMSIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_IMSIC "riscv.imsic"
+
+typedef struct RISCVIMSICState RISCVIMSICState;
+DECLARE_INSTANCE_CHECKER(RISCVIMSICState, RISCV_IMSIC, TYPE_RISCV_IMSIC)
+
+#define IMSIC_MMIO_PAGE_SHIFT 12
+#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_SIZE(__num_pages) ((__num_pages) * IMSIC_MMIO_PAGE_SZ)
+
+#define IMSIC_MMIO_HART_GUEST_MAX_BTIS 6
+#define IMSIC_MMIO_GROUP_MIN_SHIFT 24
+
+#define IMSIC_HART_NUM_GUESTS(__guest_bits) \
+ (1U << (__guest_bits))
+#define IMSIC_HART_SIZE(__guest_bits) \
+ (IMSIC_HART_NUM_GUESTS(__guest_bits) * IMSIC_MMIO_PAGE_SZ)
+#define IMSIC_GROUP_NUM_HARTS(__hart_bits) \
+ (1U << (__hart_bits))
+#define IMSIC_GROUP_SIZE(__hart_bits, __guest_bits) \
+ (IMSIC_GROUP_NUM_HARTS(__hart_bits) * IMSIC_HART_SIZE(__guest_bits))
+
+struct RISCVIMSICState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ qemu_irq *external_irqs;
+
+ /*< public >*/
+ MemoryRegion mmio;
+ uint32_t num_eistate;
+ uint32_t *eidelivery;
+ uint32_t *eithreshold;
+ uint32_t *eistate;
+
+ /* config */
+ bool mmode;
+ uint32_t hartid;
+ uint32_t num_pages;
+ uint32_t num_irqs;
+};
+
+DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
+ uint32_t num_pages, uint32_t num_ids);
+
+#endif
diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h
index eac35ef..00da9de 100644
--- a/include/hw/riscv/opentitan.h
+++ b/include/hw/riscv/opentitan.h
@@ -57,8 +57,10 @@ enum {
IBEX_DEV_FLASH,
IBEX_DEV_FLASH_VIRTUAL,
IBEX_DEV_UART,
+ IBEX_DEV_SPI_DEVICE,
+ IBEX_DEV_SPI_HOST0,
+ IBEX_DEV_SPI_HOST1,
IBEX_DEV_GPIO,
- IBEX_DEV_SPI,
IBEX_DEV_I2C,
IBEX_DEV_PATTGEN,
IBEX_DEV_TIMER,
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 6e9f61c..78b058e 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -24,26 +24,36 @@
#include "hw/block/flash.h"
#include "qom/object.h"
-#define VIRT_CPUS_MAX 32
-#define VIRT_SOCKETS_MAX 8
+#define VIRT_CPUS_MAX_BITS 9
+#define VIRT_CPUS_MAX (1 << VIRT_CPUS_MAX_BITS)
+#define VIRT_SOCKETS_MAX_BITS 2
+#define VIRT_SOCKETS_MAX (1 << VIRT_SOCKETS_MAX_BITS)
#define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
typedef struct RISCVVirtState RISCVVirtState;
DECLARE_INSTANCE_CHECKER(RISCVVirtState, RISCV_VIRT_MACHINE,
TYPE_RISCV_VIRT_MACHINE)
+typedef enum RISCVVirtAIAType {
+ VIRT_AIA_TYPE_NONE = 0,
+ VIRT_AIA_TYPE_APLIC,
+ VIRT_AIA_TYPE_APLIC_IMSIC,
+} RISCVVirtAIAType;
+
struct RISCVVirtState {
/*< private >*/
MachineState parent;
/*< public >*/
RISCVHartArrayState soc[VIRT_SOCKETS_MAX];
- DeviceState *plic[VIRT_SOCKETS_MAX];
+ DeviceState *irqchip[VIRT_SOCKETS_MAX];
PFlashCFI01 *flash[2];
FWCfgState *fw_cfg;
int fdt_size;
bool have_aclint;
+ RISCVVirtAIAType aia_type;
+ int aia_guests;
};
enum {
@@ -54,9 +64,13 @@ enum {
VIRT_CLINT,
VIRT_ACLINT_SSWI,
VIRT_PLIC,
+ VIRT_APLIC_M,
+ VIRT_APLIC_S,
VIRT_UART0,
VIRT_VIRTIO,
VIRT_FW_CFG,
+ VIRT_IMSIC_M,
+ VIRT_IMSIC_S,
VIRT_FLASH,
VIRT_DRAM,
VIRT_PCIE_MMIO,
@@ -73,8 +87,13 @@ enum {
VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */
};
-#define VIRT_PLIC_NUM_SOURCES 127
-#define VIRT_PLIC_NUM_PRIORITIES 7
+#define VIRT_IRQCHIP_IPI_MSI 1
+#define VIRT_IRQCHIP_NUM_MSIS 255
+#define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
+#define VIRT_IRQCHIP_NUM_PRIO_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS ((1U << VIRT_IRQCHIP_MAX_GUESTS_BITS) - 1U)
+
#define VIRT_PLIC_PRIORITY_BASE 0x04
#define VIRT_PLIC_PENDING_BASE 0x1000
#define VIRT_PLIC_ENABLE_BASE 0x2000
@@ -86,9 +105,15 @@ enum {
#define FDT_PCI_ADDR_CELLS 3
#define FDT_PCI_INT_CELLS 1
-#define FDT_PLIC_ADDR_CELLS 0
#define FDT_PLIC_INT_CELLS 1
-#define FDT_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + 1 + \
- FDT_PLIC_ADDR_CELLS + FDT_PLIC_INT_CELLS)
+#define FDT_APLIC_INT_CELLS 2
+#define FDT_IMSIC_INT_CELLS 0
+#define FDT_MAX_INT_CELLS 2
+#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_MAX_INT_CELLS)
+#define FDT_PLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_PLIC_INT_CELLS)
+#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_APLIC_INT_CELLS)
#endif
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 68b2e3b..8c0d9ab 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -26,8 +26,6 @@
int qemu_main(int argc, char **argv, char **envp);
#endif
-void *qemu_oom_check(void *ptr);
-
ssize_t qemu_write_full(int fd, const void *buf, size_t count)
QEMU_WARN_UNUSED_RESULT;
diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h
new file mode 100644
index 0000000..1558a82
--- /dev/null
+++ b/include/qemu/coroutine-tls.h
@@ -0,0 +1,165 @@
+/*
+ * QEMU Thread Local Storage for coroutines
+ *
+ * Copyright Red Hat
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * It is forbidden to access Thread Local Storage in coroutines because
+ * compiler optimizations may cause values to be cached across coroutine
+ * re-entry. Coroutines can run in more than one thread through the course of
+ * their life, leading bugs when stale TLS values from the wrong thread are
+ * used as a result of compiler optimization.
+ *
+ * An example is:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that may see the wrong TLS value
+ *
+ * static __thread AioContext *current_aio_context;
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(current_aio_context);
+ * qemu_coroutine_yield();
+ * aio_notify(current_aio_context); // <-- may be stale after yielding!
+ * }
+ *
+ * This header provides macros for safely defining variables in Thread Local
+ * Storage:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that safely uses TLS
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(get_current_aio_context());
+ * qemu_coroutine_yield();
+ * aio_notify(get_current_aio_context()); // <-- safe
+ * }
+ */
+
+#ifndef QEMU_COROUTINE_TLS_H
+#define QEMU_COROUTINE_TLS_H
+
+/*
+ * To stop the compiler from caching TLS values we define accessor functions
+ * with __attribute__((noinline)) plus asm volatile("") to prevent
+ * optimizations that override noinline.
+ *
+ * The compiler can still analyze noinline code and make optimizations based on
+ * that knowledge, so an inline asm output operand is used to prevent
+ * optimizations that make assumptions about the address of the TLS variable.
+ *
+ * This is fragile and ultimately needs to be solved by a mechanism that is
+ * guaranteed to work by the compiler (e.g. stackless coroutines), but for now
+ * we use this approach to prevent issues.
+ */
+
+/**
+ * QEMU_DECLARE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Declare an extern variable in Thread Local Storage from a header file:
+ *
+ * .. code-block:: c
+ * :caption: Declaring an extern variable in Thread Local Storage
+ *
+ * QEMU_DECLARE_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Declaring a TLS variable using __thread
+ *
+ * extern __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DECLARE_CO_TLS(type, var) \
+ __attribute__((noinline)) type get_##var(void); \
+ __attribute__((noinline)) void set_##var(type v); \
+ __attribute__((noinline)) type *get_ptr_##var(void);
+
+/**
+ * QEMU_DEFINE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a variable in Thread Local Storage that was previously declared from
+ * a header file with QEMU_DECLARE_CO_TLS():
+ *
+ * .. code-block:: c
+ * :caption: Defining a variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_CO_TLS(int, my_count)
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a TLS variable using __thread
+ *
+ * __thread int my_count;
+ */
+#define QEMU_DEFINE_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ type get_##var(void) { asm volatile(""); return co_tls_##var; } \
+ void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+/**
+ * QEMU_DEFINE_STATIC_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a static variable in Thread Local Storage:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static TLS variable using __thread
+ *
+ * static __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ static __attribute__((noinline, unused)) \
+ type get_##var(void) \
+ { asm volatile(""); return co_tls_##var; } \
+ static __attribute__((noinline, unused)) \
+ void set_##var(type v) \
+ { asm volatile(""); co_tls_##var = v; } \
+ static __attribute__((noinline, unused)) \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+#endif /* QEMU_COROUTINE_TLS_H */
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 09fc245..7adb12d 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -45,12 +45,26 @@
#ifndef bit_AVX2
#define bit_AVX2 (1 << 5)
#endif
-#ifndef bit_AVX512F
-#define bit_AVX512F (1 << 16)
-#endif
#ifndef bit_BMI2
#define bit_BMI2 (1 << 8)
#endif
+#ifndef bit_AVX512F
+#define bit_AVX512F (1 << 16)
+#endif
+#ifndef bit_AVX512DQ
+#define bit_AVX512DQ (1 << 17)
+#endif
+#ifndef bit_AVX512BW
+#define bit_AVX512BW (1 << 30)
+#endif
+#ifndef bit_AVX512VL
+#define bit_AVX512VL (1u << 31)
+#endif
+
+/* Leaf 7, %ecx */
+#ifndef bit_AVX512VBMI2
+#define bit_AVX512VBMI2 (1 << 6)
+#endif
/* Leaf 0x80000001, %ecx */
#ifndef bit_LZCNT
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 5e71b6d..5bd986a 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -340,6 +340,18 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
int64_t max_dirty_count,
int64_t *dirty_start, int64_t *dirty_count);
+/*
+ * bdrv_dirty_bitmap_status:
+ * @hb: The HBitmap to operate on
+ * @start: The bit to start from
+ * @count: Number of bits to proceed
+ * @pnum: Out-parameter. How many bits has same value starting from @start
+ *
+ * Returns true if bitmap is dirty at @start, false otherwise.
+ */
+bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
+ int64_t *pnum);
+
/**
* hbitmap_iter_next:
* @hbi: HBitmapIter to operate on.
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 6e67b69..c105b31 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -169,6 +169,12 @@ typedef struct Job {
* Callbacks and other information about a Job driver.
*/
struct JobDriver {
+
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards
+ */
+
/** Derived Job struct size */
size_t instance_size;
@@ -184,9 +190,18 @@ struct JobDriver {
* aborted. If it returns zero, the job moves into the WAITING state. If it
* is the last job to complete in its transaction, all jobs in the
* transaction move from WAITING to PENDING.
+ *
+ * This callback must be run in the job's context.
*/
int coroutine_fn (*run)(Job *job, Error **errp);
+ /*
+ * Functions run without regard to the BQL that may run in any
+ * arbitrary thread. These functions do not need to be thread-safe
+ * because the caller ensures that they are invoked from one
+ * thread at time.
+ */
+
/**
* If the callback is not NULL, it will be invoked when the job transitions
* into the paused state. Paused jobs must not perform any asynchronous
@@ -201,6 +216,13 @@ struct JobDriver {
*/
void coroutine_fn (*resume)(Job *job);
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* Called when the job is resumed by the user (i.e. user_paused becomes
* false). .user_resume is called before .resume.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 8dbc6fc..7a4d6a0 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -242,10 +242,52 @@ AioContext *iohandler_get_aio_context(void);
* must always be taken outside other locks. This function helps
* functions take different paths depending on whether the current
* thread is running within the main loop mutex.
+ *
+ * This function should never be used in the block layer, because
+ * unit tests, block layer tools and qemu-storage-daemon do not
+ * have a BQL.
+ * Please instead refer to qemu_in_main_thread().
*/
bool qemu_mutex_iothread_locked(void);
/**
+ * qemu_in_main_thread: return whether it's possible to safely access
+ * the global state of the block layer.
+ *
+ * Global state of the block layer is not accessible from I/O threads
+ * or worker threads; only from threads that "own" the default
+ * AioContext that qemu_get_aio_context() returns. For tests, block
+ * layer tools and qemu-storage-daemon there is a designated thread that
+ * runs the event loop for qemu_get_aio_context(), and that is the
+ * main thread.
+ *
+ * For emulators, however, any thread that holds the BQL can act
+ * as the block layer main thread; this will be any of the actual
+ * main thread, the vCPU threads or the RCU thread.
+ *
+ * For clarity, do not use this function outside the block layer.
+ */
+bool qemu_in_main_thread(void);
+
+/* Mark and check that the function is part of the global state API. */
+#define GLOBAL_STATE_CODE() \
+ do { \
+ assert(qemu_in_main_thread()); \
+ } while (0)
+
+/* Mark and check that the function is part of the I/O API. */
+#define IO_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/* Mark and check that the function is part of the "I/O OR GS" API. */
+#define IO_OR_GS_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/**
* qemu_mutex_lock_iothread: Lock the main loop mutex.
*
* This function locks the main loop mutex. The mutex is taken by
diff --git a/include/qemu/memalign.h b/include/qemu/memalign.h
new file mode 100644
index 0000000..fa299f3
--- /dev/null
+++ b/include/qemu/memalign.h
@@ -0,0 +1,61 @@
+/*
+ * Allocation and free functions for aligned memory
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MEMALIGN_H
+#define QEMU_MEMALIGN_H
+
+/**
+ * qemu_try_memalign: Allocate aligned memory
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory on an aligned boundary (i.e. the returned
+ * address will be an exact multiple of @alignment).
+ * @alignment must be a power of 2, or the function will assert().
+ * On success, returns allocated memory; on failure, returns NULL.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_try_memalign(size_t alignment, size_t size);
+/**
+ * qemu_memalign: Allocate aligned memory, without failing
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory in the same way as qemu_try_memalign(), but
+ * abort() with an error message if the memory allocation fails.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_memalign(size_t alignment, size_t size);
+/**
+ * qemu_vfree: Free memory allocated through qemu_memalign
+ * @ptr: memory to free
+ *
+ * This function must be used to free memory allocated via qemu_memalign()
+ * or qemu_try_memalign(). (Using the wrong free function will cause
+ * subtle bugs on Windows hosts.)
+ */
+void qemu_vfree(void *ptr);
+/*
+ * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
+ * g_autofree macro.
+ */
+static inline void qemu_cleanup_generic_vfree(void *p)
+{
+ void **pp = (void **)p;
+ qemu_vfree(*pp);
+}
+
+/*
+ * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
+ */
+#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
+
+#endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 7bcce3b..c9ec783 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -379,28 +379,10 @@ extern "C" {
#endif
int qemu_daemon(int nochdir, int noclose);
-void *qemu_try_memalign(size_t alignment, size_t size);
-void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
bool noreserve);
-void qemu_vfree(void *ptr);
void qemu_anon_ram_free(void *ptr, size_t size);
-/*
- * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
- * g_autofree macro.
- */
-static inline void qemu_cleanup_generic_vfree(void *p)
-{
- void **pp = (void **)p;
- qemu_vfree(*pp);
-}
-
-/*
- * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
- */
-#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
-
#ifdef _WIN32
#define HAVE_CHARDEV_SERIAL 1
#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
@@ -673,19 +655,6 @@ static inline int platform_does_not_support_system(const char *command)
}
#endif /* !HAVE_SYSTEM_FUNCTION */
-/**
- * Duplicate directory entry @dent.
- *
- * It is highly recommended to use this function instead of open coding
- * duplication of @c dirent objects, because the actual @c struct @c dirent
- * size may be bigger or shorter than @c sizeof(struct dirent) and correct
- * handling is platform specific (see gitlab issue #841).
- *
- * @dent - original directory entry to be duplicated
- * @returns duplicated directory entry which should be freed with g_free()
- */
-struct dirent *qemu_dirent_dup(struct dirent *dent);
-
#ifdef __cplusplus
}
#endif
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index e69efbd..b063c6f 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -29,6 +29,7 @@
#include "qemu/atomic.h"
#include "qemu/notify.h"
#include "qemu/sys_membarrier.h"
+#include "qemu/coroutine-tls.h"
#ifdef __cplusplus
extern "C" {
@@ -76,11 +77,11 @@ struct rcu_reader_data {
NotifierList force_rcu;
};
-extern __thread struct rcu_reader_data rcu_reader;
+QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
static inline void rcu_read_lock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
unsigned ctr;
if (p_rcu_reader->depth++ > 0) {
@@ -96,7 +97,7 @@ static inline void rcu_read_lock(void)
static inline void rcu_read_unlock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
assert(p_rcu_reader->depth != 0);
if (--p_rcu_reader->depth > 0) {
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 5b302cb..42f4ceb 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -26,6 +26,7 @@ typedef struct AddressSpace AddressSpace;
typedef struct AioContext AioContext;
typedef struct Aml Aml;
typedef struct AnnounceTimer AnnounceTimer;
+typedef struct ArchCPU ArchCPU;
typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
typedef struct BlockBackend BlockBackend;
@@ -39,6 +40,7 @@ typedef struct CompatProperty CompatProperty;
typedef struct CoMutex CoMutex;
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
typedef struct CPUAddressSpace CPUAddressSpace;
+typedef struct CPUArchState CPUArchState;
typedef struct CPUState CPUState;
typedef struct DeviceListener DeviceListener;
typedef struct DeviceState DeviceState;
diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h
index a83fe8e..f1d0f7b 100644
--- a/include/qemu/xattr.h
+++ b/include/qemu/xattr.h
@@ -22,7 +22,9 @@
#ifdef CONFIG_LIBATTR
# include <attr/xattr.h>
#else
-# define ENOATTR ENODATA
+# if !defined(ENOATTR)
+# define ENOATTR ENODATA
+# endif
# include <sys/xattr.h>
#endif
diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h
index 032f697..6013c94 100644
--- a/include/sysemu/accel-ops.h
+++ b/include/sysemu/accel-ops.h
@@ -28,8 +28,11 @@ struct AccelOpsClass {
/* initialization function called when accel is chosen */
void (*ops_init)(AccelOpsClass *ops);
+ bool (*cpus_are_resettable)(void);
+
void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */
void (*kick_vcpu_thread)(CPUState *cpu);
+ bool (*cpu_thread_is_idle)(CPUState *cpu);
void (*synchronize_post_reset)(CPUState *cpu);
void (*synchronize_post_init)(CPUState *cpu);
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 70c5795..79c2591 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -28,4 +28,6 @@ enum {
extern const uint32_t arch_type;
+void qemu_init_arch_modules(void);
+
#endif
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
new file mode 100644
index 0000000..2391679
--- /dev/null
+++ b/include/sysemu/block-backend-common.h
@@ -0,0 +1,102 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types. Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users. Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load, Error **errp);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+ /*
+ * Runs when the backend receives a drain request.
+ */
+ void (*drained_begin)(void *opaque);
+ /*
+ * Runs when the backend's last drain request ends.
+ */
+ void (*drained_end)(void *opaque);
+ /*
+ * Is the device still busy?
+ */
+ bool (*drained_poll)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+ ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
new file mode 100644
index 0000000..2e93a74
--- /dev/null
+++ b/include/sysemu/block-backend-global-state.h
@@ -0,0 +1,116 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GS_H
+#define BLOCK_BACKEND_GS_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, Error **errp);
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+int blk_get_refcnt(BlockBackend *blk);
+void blk_ref(BlockBackend *blk);
+void blk_unref(BlockBackend *blk);
+void blk_remove_all_bs(void);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+BlockBackend *blk_all_next(BlockBackend *blk);
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
+void monitor_remove_blk(BlockBackend *blk);
+
+BlockBackendPublic *blk_get_public(BlockBackend *blk);
+BlockBackend *blk_by_public(BlockBackendPublic *public);
+
+void blk_remove_bs(BlockBackend *blk);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
+bool bdrv_has_blk(BlockDriverState *bs);
+bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
+
+void blk_iostatus_enable(BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
+DeviceState *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
+
+void blk_activate(BlockBackend *blk, Error **errp);
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
+void blk_aio_cancel(BlockAIOCB *acb);
+int blk_commit_all(void);
+void blk_drain(BlockBackend *blk);
+void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_sg(BlockBackend *blk);
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
+int blk_get_flags(BlockBackend *blk);
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
+void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
+void blk_op_block_all(BlockBackend *blk, Error *reason);
+void blk_op_unblock_all(BlockBackend *blk, Error *reason);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+ Error **errp);
+void blk_add_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *,
+ void *),
+ void (*detach_aio_context)(void *),
+ void *opaque);
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
+int blk_get_open_flags_from_root_state(BlockBackend *blk);
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+ int64_t pos, int size);
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
+
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
+void blk_io_limits_disable(BlockBackend *blk);
+void blk_io_limits_enable(BlockBackend *blk, const char *group);
+void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_set_force_allow_inactivate(BlockBackend *blk);
+
+void blk_register_buf(BlockBackend *blk, void *host, size_t size);
+void blk_unregister_buf(BlockBackend *blk, void *host);
+
+const BdrvChild *blk_root(BlockBackend *blk);
+
+int blk_make_empty(BlockBackend *blk, Error **errp);
+
+#endif /* BLOCK_BACKEND_GS_H */
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
new file mode 100644
index 0000000..6517c39
--- /dev/null
+++ b/include/sysemu/block-backend-io.h
@@ -0,0 +1,161 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_IO_H
+#define BLOCK_BACKEND_IO_H
+
+#include "block-backend-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+const char *blk_name(const BlockBackend *blk);
+
+BlockDriverState *blk_bs(BlockBackend *blk);
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
+void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+
+char *blk_get_attached_dev_id(BlockBackend *blk);
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
+ BlockCompletionFunc *cb, void *opaque);
+void blk_aio_cancel_async(BlockAIOCB *acb);
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+
+void blk_inc_in_flight(BlockBackend *blk);
+void blk_dec_in_flight(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
+void blk_lock_medium(BlockBackend *blk, bool locked);
+void blk_eject(BlockBackend *blk, bool eject_flag);
+int64_t blk_getlength(BlockBackend *blk);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
+int64_t blk_nb_sectors(BlockBackend *blk);
+void *blk_try_blockalign(BlockBackend *blk, size_t size);
+void *blk_blockalign(BlockBackend *blk, size_t size);
+bool blk_is_writable(BlockBackend *blk);
+bool blk_enable_write_cache(BlockBackend *blk);
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+ int error);
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+ bool is_read, int error);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
+int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
+void blk_set_guest_block_size(BlockBackend *blk, int align);
+
+void blk_io_plug(BlockBackend *blk);
+void blk_io_unplug(BlockBackend *blk);
+AioContext *blk_get_aio_context(BlockBackend *blk);
+BlockAcctStats *blk_get_stats(BlockBackend *blk);
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret);
+
+uint32_t blk_get_request_alignment(BlockBackend *blk);
+uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+ BlockBackend *blk_out, int64_t off_out,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+ int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+ int64_t bytes);
+
+int coroutine_fn blk_co_flush(BlockBackend *blk);
+int blk_flush(BlockBackend *blk);
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+ int64_t bytes);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+#endif /* BLOCK_BACKEND_IO_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index e5e1524..038be9f 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -13,272 +13,9 @@
#ifndef BLOCK_BACKEND_H
#define BLOCK_BACKEND_H
-#include "qemu/iov.h"
-#include "block/throttle-groups.h"
+#include "block-backend-global-state.h"
+#include "block-backend-io.h"
-/*
- * TODO Have to include block/block.h for a bunch of block layer
- * types. Unfortunately, this pulls in the whole BlockDriverState
- * API, which we don't want used by many BlockBackend users. Some of
- * the types belong here, and the rest should be split into a common
- * header and one for the BlockDriverState API.
- */
-#include "block/block.h"
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
- /*
- * Runs when virtual media changed (monitor commands eject, change)
- * Argument load is true on load and false on eject.
- * Beware: doesn't run when a host device's physical media
- * changes. Sure would be useful if it did.
- * Device models with removable media must implement this callback.
- */
- void (*change_media_cb)(void *opaque, bool load, Error **errp);
- /*
- * Runs when an eject request is issued from the monitor, the tray
- * is closed, and the medium is locked.
- * Device models that do not implement is_medium_locked will not need
- * this callback. Device models that can lock the medium or tray might
- * want to implement the callback and unlock the tray when "force" is
- * true, even if they do not support eject requests.
- */
- void (*eject_request_cb)(void *opaque, bool force);
- /*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
- bool (*is_tray_open)(void *opaque);
- /*
- * Is the virtual medium locked into the device?
- * Device models implement this only when device has such a lock.
- */
- bool (*is_medium_locked)(void *opaque);
- /*
- * Runs when the size changed (e.g. monitor command block_resize)
- */
- void (*resize_cb)(void *opaque);
- /*
- * Runs when the backend receives a drain request.
- */
- void (*drained_begin)(void *opaque);
- /*
- * Runs when the backend's last drain request ends.
- */
- void (*drained_end)(void *opaque);
- /*
- * Is the device still busy?
- */
- bool (*drained_poll)(void *opaque);
-} BlockDevOps;
-
-/* This struct is embedded in (the private) BlockBackend struct and contains
- * fields that must be public. This is in particular for QLIST_ENTRY() and
- * friends so that BlockBackends can be kept in lists outside block-backend.c
- * */
-typedef struct BlockBackendPublic {
- ThrottleGroupMember throttle_group_member;
-} BlockBackendPublic;
-
-BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
- uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-int blk_get_refcnt(BlockBackend *blk);
-void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
-void blk_remove_all_bs(void);
-const char *blk_name(const BlockBackend *blk);
-BlockBackend *blk_by_name(const char *name);
-BlockBackend *blk_next(BlockBackend *blk);
-BlockBackend *blk_all_next(BlockBackend *blk);
-bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
-void monitor_remove_blk(BlockBackend *blk);
-
-BlockBackendPublic *blk_get_public(BlockBackend *blk);
-BlockBackend *blk_by_public(BlockBackendPublic *public);
-
-BlockDriverState *blk_bs(BlockBackend *blk);
-void blk_remove_bs(BlockBackend *blk);
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
-int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
-bool bdrv_has_blk(BlockDriverState *bs);
-bool bdrv_is_root_node(BlockDriverState *bs);
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
- Error **errp);
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
-
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
-void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
-void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
-void blk_iostatus_enable(BlockBackend *blk);
-bool blk_iostatus_is_enabled(const BlockBackend *blk);
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
-void blk_iostatus_disable(BlockBackend *blk);
-void blk_iostatus_reset(BlockBackend *blk);
-void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
-void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
-DeviceState *blk_get_attached_dev(BlockBackend *blk);
-char *blk_get_attached_dev_id(BlockBackend *blk);
-BlockBackend *blk_by_dev(void *dev);
-BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
- int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-
-static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_preadv(blk, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
-}
-
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
- BdrvRequestFlags flags);
-int64_t blk_getlength(BlockBackend *blk);
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
-int64_t blk_nb_sectors(BlockBackend *blk);
-BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
- BlockCompletionFunc *cb, void *opaque);
-void blk_aio_cancel(BlockAIOCB *acb);
-void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
- int64_t bytes);
-int coroutine_fn blk_co_flush(BlockBackend *blk);
-int blk_flush(BlockBackend *blk);
-int blk_commit_all(void);
-void blk_inc_in_flight(BlockBackend *blk);
-void blk_dec_in_flight(BlockBackend *blk);
-void blk_drain(BlockBackend *blk);
-void blk_drain_all(void);
-void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
- int error);
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
- bool is_read, int error);
-bool blk_supports_write_perm(BlockBackend *blk);
-bool blk_is_writable(BlockBackend *blk);
-bool blk_is_sg(BlockBackend *blk);
-bool blk_enable_write_cache(BlockBackend *blk);
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
-void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-bool blk_is_inserted(BlockBackend *blk);
-bool blk_is_available(BlockBackend *blk);
-void blk_lock_medium(BlockBackend *blk, bool locked);
-void blk_eject(BlockBackend *blk, bool eject_flag);
-int blk_get_flags(BlockBackend *blk);
-uint32_t blk_get_request_alignment(BlockBackend *blk);
-uint32_t blk_get_max_transfer(BlockBackend *blk);
-uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
-int blk_get_max_iov(BlockBackend *blk);
-int blk_get_max_hw_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
-void *blk_try_blockalign(BlockBackend *blk, size_t size);
-void *blk_blockalign(BlockBackend *blk, size_t size);
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
-void blk_op_block_all(BlockBackend *blk, Error *reason);
-void blk_op_unblock_all(BlockBackend *blk, Error *reason);
-AioContext *blk_get_aio_context(BlockBackend *blk);
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
- Error **errp);
-void blk_add_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-void blk_remove_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *,
- void *),
- void (*detach_aio_context)(void *),
- void *opaque);
-void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_io_plug(BlockBackend *blk);
-void blk_io_unplug(BlockBackend *blk);
-BlockAcctStats *blk_get_stats(BlockBackend *blk);
-BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
-void blk_update_root_state(BlockBackend *blk);
-bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
-int blk_get_open_flags_from_root_state(BlockBackend *blk);
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
- int64_t bytes);
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
- int64_t pos, int size);
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
-BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
- BlockCompletionFunc *cb,
- void *opaque, int ret);
-
-void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
-void blk_io_limits_disable(BlockBackend *blk);
-void blk_io_limits_enable(BlockBackend *blk, const char *group);
-void blk_io_limits_update_group(BlockBackend *blk, const char *group);
-void blk_set_force_allow_inactivate(BlockBackend *blk);
-
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
-
-int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
- BlockBackend *blk_out, int64_t off_out,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-const BdrvChild *blk_root(BlockBackend *blk);
-
-int blk_make_empty(BlockBackend *blk, Error **errp);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index f9fb54d..3211b16 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -13,9 +13,6 @@
#include "block/block.h"
#include "qemu/queue.h"
-void blockdev_mark_auto_del(BlockBackend *blk);
-void blockdev_auto_del(BlockBackend *blk);
-
typedef enum {
IF_DEFAULT = -1, /* for use with drive_add() only */
/*
@@ -38,6 +35,16 @@ struct DriveInfo {
QTAILQ_ENTRY(DriveInfo) next;
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
index 247f066..bf8f99a 100644
--- a/include/sysemu/hax.h
+++ b/include/sysemu/hax.h
@@ -25,17 +25,23 @@
int hax_sync_vcpus(void);
#ifdef NEED_CPU_H
+# ifdef CONFIG_HAX
+# define CONFIG_HAX_IS_POSSIBLE
+# endif
+#else /* !NEED_CPU_H */
+# define CONFIG_HAX_IS_POSSIBLE
+#endif
-#ifdef CONFIG_HAX
+#ifdef CONFIG_HAX_IS_POSSIBLE
-int hax_enabled(void);
+extern bool hax_allowed;
-#else /* CONFIG_HAX */
+#define hax_enabled() (hax_allowed)
-#define hax_enabled() (0)
+#else /* !CONFIG_HAX_IS_POSSIBLE */
-#endif /* CONFIG_HAX */
+#define hax_enabled() (0)
-#endif /* NEED_CPU_H */
+#endif /* CONFIG_HAX_IS_POSSIBLE */
#endif /* QEMU_HAX_H */
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 01b5ebf..22903a5 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -23,9 +23,4 @@ void cpu_synchronize_post_reset(CPUState *cpu);
void cpu_synchronize_post_init(CPUState *cpu);
void cpu_synchronize_pre_loadvm(CPUState *cpu);
-static inline bool cpu_check_are_resettable(void)
-{
- return kvm_enabled() ? kvm_cpu_check_are_resettable() : true;
-}
-
#endif /* QEMU_HW_ACCEL_H */
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6eb39a0..a5bec96 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -249,6 +249,9 @@ int kvm_has_intx_set_mask(void);
bool kvm_arm_supports_user_irq(void);
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
#ifdef NEED_CPU_H
#include "cpu.h"
@@ -261,9 +264,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
void kvm_remove_all_breakpoints(CPUState *cpu);
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap);
-int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
-int kvm_on_sigbus(int code, void *addr);
-
/* internal API */
int kvm_ioctl(KVMState *s, int type, ...);
diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h
index 4b20f1a..3bbeb1b 100644
--- a/include/sysemu/memory_mapping.h
+++ b/include/sysemu/memory_mapping.h
@@ -15,8 +15,7 @@
#define MEMORY_MAPPING_H
#include "qemu/queue.h"
-#include "exec/cpu-defs.h"
-#include "exec/memory.h"
+#include "exec/cpu-common.h"
typedef struct GuestPhysBlock {
/* visible to guest, reflects PCI hole, etc */
@@ -43,7 +42,7 @@ typedef struct GuestPhysBlockList {
/* The physical and virtual address in the memory mapping are contiguous. */
typedef struct MemoryMapping {
hwaddr phys_addr;
- target_ulong virt_addr;
+ vaddr virt_addr;
ram_addr_t length;
QTAILQ_ENTRY(MemoryMapping) next;
} MemoryMapping;
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 2edf336..dd64fb4 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -55,6 +55,7 @@ int os_mlock(void);
typedef struct timeval qemu_timeval;
#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+int os_set_daemonize(bool d);
bool is_daemonized(void);
/**
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 43f569b..7707522 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -77,6 +77,14 @@ typedef struct {
} qemu_timeval;
int qemu_gettimeofday(qemu_timeval *tp);
+static inline int os_set_daemonize(bool d)
+{
+ if (d) {
+ return -ENOTSUP;
+ }
+ return 0;
+}
+
static inline bool is_daemonized(void)
{
return false;
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 675873e..dd44473 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -245,6 +245,9 @@ DEF(or_vec, 1, 2, 0, IMPLVEC)
DEF(xor_vec, 1, 2, 0, IMPLVEC)
DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
+DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
+DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 42f5b50..73869fd 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -43,7 +43,7 @@
#else
#define MAX_OPC_PARAM_PER_ARG 1
#endif
-#define MAX_OPC_PARAM_IARGS 6
+#define MAX_OPC_PARAM_IARGS 7
#define MAX_OPC_PARAM_OARGS 1
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
@@ -183,6 +183,9 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec 0
+#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0