aboutsummaryrefslogtreecommitdiff
path: root/include/block
diff options
context:
space:
mode:
Diffstat (limited to 'include/block')
-rw-r--r--include/block/aio.h14
-rw-r--r--include/block/aio_task.h2
-rw-r--r--include/block/block-common.h13
-rw-r--r--include/block/block-copy.h1
-rw-r--r--include/block/block-global-state.h69
-rw-r--r--include/block/block-io.h4
-rw-r--r--include/block/block_int-common.h67
-rw-r--r--include/block/block_int-global-state.h6
-rw-r--r--include/block/block_int-io.h4
-rw-r--r--include/block/blockjob.h4
-rw-r--r--include/block/export.h3
-rw-r--r--include/block/graph-lock.h13
-rw-r--r--include/block/nbd.h6
-rw-r--r--include/block/nvme.h106
-rw-r--r--include/block/qdict.h2
-rw-r--r--include/block/raw-aio.h19
-rw-r--r--include/block/snapshot.h6
-rw-r--r--include/block/thread-pool.h62
-rw-r--r--include/block/ufs.h13
19 files changed, 321 insertions, 93 deletions
diff --git a/include/block/aio.h b/include/block/aio.h
index 4ee8193..99ff484 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -20,6 +20,7 @@
#include "qemu/coroutine-core.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
+#include "qemu/lockcnt.h"
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "block/graph-lock.h"
@@ -53,7 +54,7 @@ typedef void QEMUBHFunc(void *opaque);
typedef bool AioPollFn(void *opaque);
typedef void IOHandler(void *opaque);
-struct ThreadPool;
+struct ThreadPoolAio;
struct LinuxAioState;
typedef struct LuringState LuringState;
@@ -122,6 +123,10 @@ struct BHListSlice {
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
+typedef struct AioPolledEvent {
+ int64_t ns; /* current polling time in nanoseconds */
+} AioPolledEvent;
+
struct AioContext {
GSource source;
@@ -206,7 +211,7 @@ struct AioContext {
/* Thread pool for performing work and receiving completion callbacks.
* Has its own locking.
*/
- struct ThreadPool *thread_pool;
+ struct ThreadPoolAio *thread_pool;
#ifdef CONFIG_LINUX_AIO
struct LinuxAioState *linux_aio;
@@ -228,7 +233,6 @@ struct AioContext {
int poll_disable_cnt;
/* Polling mode parameters */
- int64_t poll_ns; /* current polling time in nanoseconds */
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
int64_t poll_grow; /* polling time growth factor */
int64_t poll_shrink; /* polling time shrink factor */
@@ -499,8 +503,8 @@ void aio_set_event_notifier_poll(AioContext *ctx,
*/
GSource *aio_get_g_source(AioContext *ctx);
-/* Return the ThreadPool bound to this AioContext */
-struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+/* Return the ThreadPoolAio bound to this AioContext */
+struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx);
/* Setup the LinuxAioState bound to this AioContext */
struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
diff --git a/include/block/aio_task.h b/include/block/aio_task.h
index 18a9c41..c81d637 100644
--- a/include/block/aio_task.h
+++ b/include/block/aio_task.h
@@ -40,8 +40,6 @@ void aio_task_pool_free(AioTaskPool *);
/* error code of failed task or 0 if all is OK */
int aio_task_pool_status(AioTaskPool *pool);
-bool aio_task_pool_empty(AioTaskPool *pool);
-
/* User provides filled @task, however task->pool will be set automatically */
void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task);
diff --git a/include/block/block-common.h b/include/block/block-common.h
index 338fe5f..c8c626d 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -257,6 +257,7 @@ typedef enum {
#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
#define BDRV_OPT_DISCARD "discard"
#define BDRV_OPT_FORCE_SHARE "force-share"
+#define BDRV_OPT_ACTIVE "active"
#define BDRV_SECTOR_BITS 9
@@ -332,6 +333,17 @@ typedef enum {
#define BDRV_BLOCK_RECURSE 0x40
#define BDRV_BLOCK_COMPRESSED 0x80
+/*
+ * Block status hints: the bitwise-or of these flags emphasize what
+ * the caller hopes to learn, and some drivers may be able to give
+ * faster answers by doing less work when the hint permits.
+ */
+#define BDRV_WANT_ZERO BDRV_BLOCK_ZERO
+#define BDRV_WANT_OFFSET_VALID BDRV_BLOCK_OFFSET_VALID
+#define BDRV_WANT_ALLOCATED BDRV_BLOCK_ALLOCATED
+#define BDRV_WANT_PRECISE (BDRV_WANT_ZERO | BDRV_WANT_OFFSET_VALID | \
+ BDRV_WANT_OFFSET_VALID)
+
typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
typedef struct BDRVReopenState {
@@ -355,7 +367,6 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_CHANGE,
BLOCK_OP_TYPE_COMMIT_SOURCE,
BLOCK_OP_TYPE_COMMIT_TARGET,
- BLOCK_OP_TYPE_DATAPLANE,
BLOCK_OP_TYPE_DRIVE_DEL,
BLOCK_OP_TYPE_EJECT,
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index bdc703b..dd5cc82 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
BlockDriverState *copy_bitmap_bs,
const BdrvDirtyBitmap *bitmap,
bool discard_source,
+ uint64_t min_cluster_size,
Error **errp);
/* Function should be called prior any actual copy request */
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index bd7cecd..62da83c 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -74,13 +74,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
int GRAPH_WRLOCK
bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp);
-int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
- Error **errp);
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
- int flags, Error **errp);
+int GRAPH_UNLOCKED
+bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, Error **errp);
+BlockDriverState * GRAPH_UNLOCKED
+bdrv_insert_node(BlockDriverState *bs, QDict *node_options, int flags,
+ Error **errp);
int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
-BdrvChild * no_coroutine_fn
+BdrvChild * no_coroutine_fn GRAPH_UNLOCKED
bdrv_open_child(const char *filename, QDict *options, const char *bdref_key,
BlockDriverState *parent, const BdrvChildClass *child_class,
BdrvChildRole child_role, bool allow_none, Error **errp);
@@ -90,9 +91,10 @@ bdrv_co_open_child(const char *filename, QDict *options, const char *bdref_key,
BlockDriverState *parent, const BdrvChildClass *child_class,
BdrvChildRole child_role, bool allow_none, Error **errp);
-int bdrv_open_file_child(const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState *parent, Error **errp);
+int GRAPH_UNLOCKED
+bdrv_open_file_child(const char *filename, QDict *options,
+ const char *bdref_key, BlockDriverState *parent,
+ Error **errp);
BlockDriverState * no_coroutine_fn
bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
@@ -100,11 +102,9 @@ bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
BlockDriverState * coroutine_fn no_co_wrapper
bdrv_co_open_blockdev_ref(BlockdevRef *ref, Error **errp);
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp);
int GRAPH_WRLOCK
-bdrv_set_backing_hd_drained(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp);
+bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
@@ -123,11 +123,12 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
Error **errp);
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, QDict *options,
- bool keep_old_opts);
+BlockReopenQueue * GRAPH_UNLOCKED
+bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs,
+ QDict *options, bool keep_old_opts);
void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int GRAPH_UNLOCKED
+bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
Error **errp);
int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
@@ -143,9 +144,10 @@ int bdrv_commit(BlockDriverState *bs);
int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp);
void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
- const char *backing_file_str,
- bool backing_mask_protocol);
+int GRAPH_UNLOCKED
+bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+ const char *backing_file_str,
+ bool backing_mask_protocol);
BlockDriverState * GRAPH_RDLOCK
bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs);
@@ -175,21 +177,27 @@ BlockDriverState * GRAPH_RDLOCK
check_to_replace_node(BlockDriverState *parent_bs, const char *node_name,
Error **errp);
+
+bool GRAPH_RDLOCK bdrv_is_inactive(BlockDriverState *bs);
+
int no_coroutine_fn GRAPH_RDLOCK
bdrv_activate(BlockDriverState *bs, Error **errp);
int coroutine_fn no_co_wrapper_bdrv_rdlock
bdrv_co_activate(BlockDriverState *bs, Error **errp);
+int no_coroutine_fn GRAPH_RDLOCK
+bdrv_inactivate(BlockDriverState *bs, Error **errp);
+
void bdrv_activate_all(Error **errp);
-int bdrv_inactivate_all(void);
+int GRAPH_UNLOCKED bdrv_inactivate_all(void);
int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain_all_begin(void);
+void GRAPH_UNLOCKED bdrv_close_all(void);
+void GRAPH_UNLOCKED bdrv_drain_all_begin(void);
void bdrv_drain_all_begin_nopoll(void);
void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
+void GRAPH_UNLOCKED bdrv_drain_all(void);
void bdrv_aio_cancel(BlockAIOCB *acb);
@@ -268,11 +276,16 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
-int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp);
+bool GRAPH_RDLOCK
+bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp);
+int GRAPH_UNLOCKED
+bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+int GRAPH_RDLOCK
+bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index b49e053..4cf83fb 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -161,6 +161,8 @@ bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base,
int coroutine_fn GRAPH_RDLOCK
bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_is_all_zeroes(BlockDriverState *bs);
int GRAPH_RDLOCK
bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
@@ -429,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
*
* This function can be recursive.
*/
-void bdrv_drained_begin(BlockDriverState *bs);
+void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs);
/**
* bdrv_do_drained_begin_quiesce:
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index ebb4e56..034c063 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -248,7 +248,7 @@ struct BlockDriver {
int GRAPH_UNLOCKED_PTR (*bdrv_open)(
BlockDriverState *bs, QDict *options, int flags, Error **errp);
- void (*bdrv_close)(BlockDriverState *bs);
+ void GRAPH_UNLOCKED_PTR (*bdrv_close)(BlockDriverState *bs);
int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)(
BlockdevCreateOptions *opts, Error **errp);
@@ -396,9 +396,23 @@ struct BlockDriver {
int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)(
BlockDriverState *bs, HDGeometry *geo);
+ /**
+ * Hot add a BDS's child. Used in combination with bdrv_del_child, so the
+ * user can take a child offline when it is broken and take a new child
+ * online.
+ *
+ * All block nodes must be drained.
+ */
void GRAPH_WRLOCK_PTR (*bdrv_add_child)(
BlockDriverState *parent, BlockDriverState *child, Error **errp);
+ /**
+ * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the
+ * user can take a child offline when it is broken and take a new child
+ * online.
+ *
+ * All block nodes must be drained.
+ */
void GRAPH_WRLOCK_PTR (*bdrv_del_child)(
BlockDriverState *parent, BdrvChild *child, Error **errp);
@@ -506,10 +520,6 @@ struct BlockDriver {
BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)(
BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)(
- BlockDriverState *bs, int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque);
-
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
@@ -608,15 +618,16 @@ struct BlockDriver {
* according to the current layer, and should only need to set
* BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
* and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
- * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
- * block.h for the overall meaning of the bits. As a hint, the
- * flag want_zero is true if the caller cares more about precise
- * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
- * overall allocation (favor larger *pnum, perhaps by reporting
- * _DATA instead of _ZERO). The block layer guarantees input
- * clamped to bdrv_getlength() and aligned to request_alignment,
- * as well as non-NULL pnum, map, and file; in turn, the driver
- * must return an error or set pnum to an aligned non-zero value.
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). The
+ * caller will synthesize BDRV_BLOCK_ALLOCATED based on the
+ * non-zero results. See block.h for the overall meaning of the
+ * bits. As a hint, the flags in @mode may include a bitwise-or
+ * of BDRV_WANT_ALLOCATED, BDRV_WANT_OFFSET_VALID, or
+ * BDRV_WANT_ZERO based on what the caller is looking for in the
+ * results. The block layer guarantees input clamped to
+ * bdrv_getlength() and aligned to request_alignment, as well as
+ * non-NULL pnum, map, and file; in turn, the driver must return
+ * an error or set pnum to an aligned non-zero value.
*
* Note that @bytes is just a hint on how big of a region the
* caller wants to inspect. It is not a limit on *pnum.
@@ -628,8 +639,8 @@ struct BlockDriver {
* to clamping *pnum for return to its caller.
*/
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)(
- BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ BlockDriverState *bs, unsigned int mode,
+ int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
/*
@@ -653,8 +664,8 @@ struct BlockDriver {
QEMUIOVector *qiov, size_t qiov_offset);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)(
- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map, BlockDriverState **file);
+ BlockDriverState *bs, unsigned int mode, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file);
int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)(
BlockDriverState *bs, int64_t offset, int64_t bytes);
@@ -986,9 +997,21 @@ struct BdrvChildClass {
bool backing_mask_protocol,
Error **errp);
- bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
+ /*
+ * Notifies the parent that the child is trying to change its AioContext.
+ * The parent may in turn change the AioContext of other nodes in the same
+ * transaction. Returns true if the change is possible and the transaction
+ * can be continued. Returns false and sets @errp if not and the transaction
+ * must be aborted.
+ *
+ * @visited will accumulate all visited BdrvChild objects. The caller is
+ * responsible for freeing the list afterwards.
+ *
+ * Must be called with the affected block nodes drained.
+ */
+ bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited,
+ Transaction *tran, Error **errp);
/*
* I/O API functions. These functions are thread-safe.
@@ -1230,7 +1253,7 @@ struct BlockDriverState {
/* do we need to tell the quest if we have a volatile write cache? */
int enable_write_cache;
- /* Accessed with atomic ops. */
+ /* Accessed only in the main thread. */
int quiesce_counter;
unsigned int write_gen; /* Current data generation */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index eb2d92a..e7c8f1a 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -139,7 +139,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
* @buf_size: The amount of data that can be in flight at one time.
* @mode: Whether to collapse all images in the chain to the target.
* @backing_mode: How to establish the target's backing chain after completion.
- * @zero_target: Whether the target should be explicitly zero-initialized
+ * @target_is_zero: Whether the target already is zero-initialized.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
@@ -159,7 +159,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
- bool zero_target,
+ bool target_is_zero,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, const char *filter_node_name,
@@ -179,6 +179,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
* all ".has_*" fields are ignored.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
+ * @on_cbw_error: The action to take upon error in copy-before-write operations.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @cb: Completion function for the job.
@@ -198,6 +199,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BackupPerf *perf,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
+ OnCbwError on_cbw_error,
int creation_flags,
BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp);
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4a7cf2b..4f94eb3 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -38,8 +38,8 @@
int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status(
- BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map, BlockDriverState **file);
+ BlockDriverState *bs, unsigned int mode, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file);
int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
int64_t offset, int64_t bytes);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 7061ab7..85284cb 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id);
* Add @bs to the list of BlockDriverState that are involved in
* @job. This means that all operations will be blocked on @bs while
* @job exists.
+ *
+ * All block nodes must be drained.
*/
int GRAPH_WRLOCK
block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
@@ -149,7 +151,7 @@ block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
* Remove all BlockDriverStates from the list of nodes that are involved in the
* job. This removes the blockers added with block_job_add_bdrv().
*/
-void block_job_remove_all_bdrv(BlockJob *job);
+void GRAPH_UNLOCKED block_job_remove_all_bdrv(BlockJob *job);
/**
* block_job_has_bdrv:
diff --git a/include/block/export.h b/include/block/export.h
index f2fe0f8..4bd9531 100644
--- a/include/block/export.h
+++ b/include/block/export.h
@@ -29,6 +29,9 @@ typedef struct BlockExportDriver {
*/
size_t instance_size;
+ /* True if the export type supports running on an inactive node */
+ bool supports_inactive;
+
/* Creates and starts a new block export */
int (*create)(BlockExport *, BlockExportOptions *, Error **);
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index dc8d949..95bf5ed 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -20,8 +20,6 @@
#ifndef GRAPH_LOCK_H
#define GRAPH_LOCK_H
-#include "qemu/clang-tsa.h"
-
/**
* Graph Lock API
* This API provides a rwlock used to protect block layer
@@ -115,9 +113,20 @@ void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA
bdrv_graph_wrlock(void);
/*
+ * bdrv_graph_wrlock_drained:
+ * Similar to bdrv_graph_wrlock, but will begin a drained section before
+ * locking.
+ */
+void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA
+bdrv_graph_wrlock_drained(void);
+
+/*
* bdrv_graph_wrunlock:
* Write finished, reset global has_writer to 0 and restart
* all readers that are waiting.
+ *
+ * Also ends the drained section if bdrv_graph_wrlock_drained() was used to lock
+ * the graph.
*/
void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA
bdrv_graph_wrunlock(void);
diff --git a/include/block/nbd.h b/include/block/nbd.h
index d4f8b21..92987c7 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -428,9 +428,9 @@ void nbd_client_put(NBDClient *client);
void nbd_server_is_qemu_nbd(int max_connections);
bool nbd_server_is_running(void);
int nbd_server_max_connections(void);
-void nbd_server_start(SocketAddress *addr, const char *tls_creds,
- const char *tls_authz, uint32_t max_connections,
- Error **errp);
+void nbd_server_start(SocketAddress *addr, uint32_t handshake_max_secs,
+ const char *tls_creds, const char *tls_authz,
+ uint32_t max_connections, Error **errp);
void nbd_server_start_options(NbdServerOptions *arg, Error **errp);
/* nbd_read
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 5298bc4..358e516 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -142,9 +142,9 @@ enum NvmeCapMask {
((cap) |= (uint64_t)((val) & CAP_CMBS_MASK) << CAP_CMBS_SHIFT)
enum NvmeCapCss {
- NVME_CAP_CSS_NVM = 1 << 0,
- NVME_CAP_CSS_CSI_SUPP = 1 << 6,
- NVME_CAP_CSS_ADMIN_ONLY = 1 << 7,
+ NVME_CAP_CSS_NCSS = 1 << 0,
+ NVME_CAP_CSS_IOCSS = 1 << 6,
+ NVME_CAP_CSS_NOIOCSS = 1 << 7,
};
enum NvmeCcShift {
@@ -177,7 +177,7 @@ enum NvmeCcMask {
enum NvmeCcCss {
NVME_CC_CSS_NVM = 0x0,
- NVME_CC_CSS_CSI = 0x6,
+ NVME_CC_CSS_ALL = 0x6,
NVME_CC_CSS_ADMIN_ONLY = 0x7,
};
@@ -906,8 +906,7 @@ enum NvmeStatusCodes {
NVME_SGL_DESCR_TYPE_INVALID = 0x0011,
NVME_INVALID_USE_OF_CMB = 0x0012,
NVME_INVALID_PRP_OFFSET = 0x0013,
- NVME_CMD_SET_CMB_REJECTED = 0x002b,
- NVME_INVALID_CMD_SET = 0x002c,
+ NVME_COMMAND_INTERRUPTED = 0x0021,
NVME_FDP_DISABLED = 0x0029,
NVME_INVALID_PHID_LIST = 0x002a,
NVME_LBA_RANGE = 0x0080,
@@ -940,6 +939,10 @@ enum NvmeStatusCodes {
NVME_INVALID_SEC_CTRL_STATE = 0x0120,
NVME_INVALID_NUM_RESOURCES = 0x0121,
NVME_INVALID_RESOURCE_ID = 0x0122,
+ NVME_IOCS_NOT_SUPPORTED = 0x0129,
+ NVME_IOCS_NOT_ENABLED = 0x012a,
+ NVME_IOCS_COMBINATION_REJECTED = 0x012b,
+ NVME_INVALID_IOCS = 0x012c,
NVME_CONFLICTING_ATTRS = 0x0180,
NVME_INVALID_PROT_INFO = 0x0181,
NVME_WRITE_TO_RO = 0x0182,
@@ -1015,6 +1018,40 @@ typedef struct QEMU_PACKED NvmeSmartLog {
uint8_t reserved2[320];
} NvmeSmartLog;
+typedef struct QEMU_PACKED NvmeSmartLogExtended {
+ uint64_t physical_media_units_written[2];
+ uint64_t physical_media_units_read[2];
+ uint64_t bad_user_blocks;
+ uint64_t bad_system_nand_blocks;
+ uint64_t xor_recovery_count;
+ uint64_t uncorrectable_read_error_count;
+ uint64_t soft_ecc_error_count;
+ uint64_t end2end_correction_counts;
+ uint8_t system_data_percent_used;
+ uint8_t refresh_counts[7];
+ uint64_t user_data_erase_counts;
+ uint16_t thermal_throttling_stat_and_count;
+ uint16_t dssd_spec_version[3];
+ uint64_t pcie_correctable_error_count;
+ uint32_t incomplete_shutdowns;
+ uint32_t rsvd116;
+ uint8_t percent_free_blocks;
+ uint8_t rsvd121[7];
+ uint16_t capacity_health;
+ uint8_t nvme_errata_ver;
+ uint8_t rsvd131[5];
+ uint64_t unaligned_io;
+ uint64_t security_ver_num;
+ uint64_t total_nuse;
+ uint64_t plp_start_count[2];
+ uint64_t endurance_estimate[2];
+ uint64_t pcie_retraining_count;
+ uint64_t power_state_change_count;
+ uint8_t rsvd208[286];
+ uint16_t log_page_version;
+ uint64_t log_page_guid[2];
+} NvmeSmartLogExtended;
+
#define NVME_SMART_WARN_MAX 6
enum NvmeSmartWarn {
NVME_SMART_SPARE = 1 << 0,
@@ -1052,6 +1089,12 @@ enum NvmeLogIdentifier {
NVME_LOG_FDP_RUH_USAGE = 0x21,
NVME_LOG_FDP_STATS = 0x22,
NVME_LOG_FDP_EVENTS = 0x23,
+ NVME_LOG_VENDOR_START = 0xc0,
+ NVME_LOG_VENDOR_END = 0xff,
+};
+
+enum NvmeOcpLogIdentifier {
+ NVME_OCP_EXTENDED_SMART_INFO = 0xc0,
};
typedef struct QEMU_PACKED NvmePSD {
@@ -1077,6 +1120,7 @@ enum NvmeIdCns {
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07,
+ NVME_ID_CNS_CS_IND_NS = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
@@ -1087,6 +1131,7 @@ enum NvmeIdCns {
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
+ NVME_ID_CNS_CS_IND_NS_ALLOCATED = 0x1f,
};
typedef struct QEMU_PACKED NvmeIdCtrl {
@@ -1165,6 +1210,8 @@ typedef struct NvmeIdCtrlZoned {
uint8_t rsvd1[4095];
} NvmeIdCtrlZoned;
+#define NVME_ID_CTRL_NVM_DMRL_MAX 255
+
typedef struct NvmeIdCtrlNvm {
uint8_t vsl;
uint8_t wzsl;
@@ -1182,6 +1229,7 @@ enum NvmeIdCtrlOaes {
enum NvmeIdCtrlCtratt {
NVME_CTRATT_ENDGRPS = 1 << 4,
NVME_CTRATT_ELBAS = 1 << 15,
+ NVME_CTRATT_MEM = 1 << 16,
NVME_CTRATT_FDPS = 1 << 19,
};
@@ -1189,9 +1237,10 @@ enum NvmeIdCtrlOacs {
NVME_OACS_SECURITY = 1 << 0,
NVME_OACS_FORMAT = 1 << 1,
NVME_OACS_FW = 1 << 2,
- NVME_OACS_NS_MGMT = 1 << 3,
+ NVME_OACS_NMS = 1 << 3,
NVME_OACS_DIRECTIVES = 1 << 5,
- NVME_OACS_DBBUF = 1 << 8,
+ NVME_OACS_VMS = 1 << 7,
+ NVME_OACS_DBCS = 1 << 8,
};
enum NvmeIdCtrlOncs {
@@ -1285,6 +1334,8 @@ enum NvmeNsAttachmentOperation {
#define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff)
#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000)
+#define NVME_ID_CTRL_CTRATT_MEM(ctratt) (ctratt & NVME_CTRATT_MEM)
+
enum NvmeFeatureIds {
NVME_ARBITRATION = 0x1,
NVME_POWER_MANAGEMENT = 0x2,
@@ -1413,9 +1464,28 @@ typedef struct QEMU_PACKED NvmeIdNsNvm {
uint8_t pic;
uint8_t rsvd9[3];
uint32_t elbaf[NVME_MAX_NLBAF];
- uint8_t rsvd268[3828];
+ uint32_t npdgl;
+ uint32_t nprg;
+ uint32_t npra;
+ uint32_t nors;
+ uint32_t npdal;
+ uint8_t rsvd288[3808];
} NvmeIdNsNvm;
+typedef struct QEMU_PACKED NvmeIdNsInd {
+ uint8_t nsfeat;
+ uint8_t nmic;
+ uint8_t rescap;
+ uint8_t fpi;
+ uint32_t anagrpid;
+ uint8_t nsattr;
+ uint8_t rsvd9;
+ uint16_t nvmsetid;
+ uint16_t endgrpid;
+ uint8_t nstat;
+ uint8_t rsvd15[4081];
+} NvmeIdNsInd;
+
typedef struct QEMU_PACKED NvmeIdNsDescr {
uint8_t nidt;
uint8_t nidl;
@@ -1436,8 +1506,10 @@ enum NvmeNsIdentifierType {
NVME_NIDT_CSI = 0x04,
};
-enum NvmeIdNsNmic {
- NVME_NMIC_NS_SHARED = 1 << 0,
+enum NvmeIdNsIndependent {
+ NVME_ID_NS_IND_NMIC_SHRNS = 1 << 0,
+ NVME_ID_NS_IND_NMIC_DISNS = 1 << 1,
+ NVME_ID_NS_IND_NSTAT_NRDY = 1 << 0,
};
enum NvmeCsi {
@@ -1515,6 +1587,16 @@ enum NvmeIdNsMc {
NVME_ID_NS_MC_SEPARATE = 1 << 1,
};
+enum NvmeIdNsNsfeat {
+ NVME_ID_NS_NSFEAT_THINP = 1 << 0,
+ NVME_ID_NS_NSFEAT_NSABPNS = 1 << 1,
+ NVME_ID_NS_NSFEAT_DAE = 1 << 2,
+ NVME_ID_NS_NSFEAT_UIDREUSE = 1 << 3,
+ NVME_ID_NS_NSFEAT_OPTPERF_ALL = 3 << 4,
+ NVME_ID_NS_NSFEAT_MAM = 1 << 6,
+ NVME_ID_NS_NSFEAT_OPTRPERF = 1 << 7,
+};
+
#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
enum NvmePIFormat {
@@ -1863,6 +1945,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLogExtended) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096);
@@ -1870,6 +1953,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsInd) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
diff --git a/include/block/qdict.h b/include/block/qdict.h
index b4c28d9..53c4df4 100644
--- a/include/block/qdict.h
+++ b/include/block/qdict.h
@@ -10,7 +10,7 @@
#ifndef BLOCK_QDICT_H
#define BLOCK_QDICT_H
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
QObject *qdict_crumple(const QDict *src, Error **errp);
void qdict_flatten(QDict *qdict);
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 6267068..6570244 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -17,6 +17,7 @@
#define QEMU_RAW_AIO_H
#include "block/aio.h"
+#include "block/block-common.h"
#include "qemu/iov.h"
/* AIO request types */
@@ -58,11 +59,18 @@ void laio_cleanup(LinuxAioState *s);
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
- int type, uint64_t dev_max_batch);
+ int type, BdrvRequestFlags flags,
+ uint64_t dev_max_batch);
bool laio_has_fdsync(int);
+bool laio_has_fua(void);
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
+#else
+static inline bool laio_has_fua(void)
+{
+ return false;
+}
#endif
/* io_uring.c - Linux io_uring implementation */
#ifdef CONFIG_LINUX_IO_URING
@@ -71,9 +79,16 @@ void luring_cleanup(LuringState *s);
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
- QEMUIOVector *qiov, int type);
+ QEMUIOVector *qiov, int type,
+ BdrvRequestFlags flags);
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
+bool luring_has_fua(void);
+#else
+static inline bool luring_has_fua(void)
+{
+ return false;
+}
#endif
#ifdef _WIN32
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 304cc6e..2316a43 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -90,9 +90,9 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);
-int bdrv_all_delete_snapshot(const char *name,
- bool has_devices, strList *devices,
- Error **errp);
+int GRAPH_UNLOCKED
+bdrv_all_delete_snapshot(const char *name, bool has_devices, strList *devices,
+ Error **errp);
int bdrv_all_goto_snapshot(const char *name,
bool has_devices, strList *devices,
Error **errp);
diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
index 948ff5f..dd48cf0 100644
--- a/include/block/thread-pool.h
+++ b/include/block/thread-pool.h
@@ -24,20 +24,70 @@
typedef int ThreadPoolFunc(void *opaque);
-typedef struct ThreadPool ThreadPool;
+typedef struct ThreadPoolAio ThreadPoolAio;
-ThreadPool *thread_pool_new(struct AioContext *ctx);
-void thread_pool_free(ThreadPool *pool);
+ThreadPoolAio *thread_pool_new_aio(struct AioContext *ctx);
+void thread_pool_free_aio(ThreadPoolAio *pool);
/*
- * thread_pool_submit* API: submit I/O requests in the thread's
+ * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's
* current AioContext.
*/
BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
BlockCompletionFunc *cb, void *opaque);
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
-void thread_pool_submit(ThreadPoolFunc *func, void *arg);
+void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx);
+
+/* ------------------------------------------- */
+/* Generic thread pool types and methods below */
+typedef struct ThreadPool ThreadPool;
+
+/* Create a new thread pool. Never returns NULL. */
+ThreadPool *thread_pool_new(void);
+
+/*
+ * Free the thread pool.
+ * Waits for all the previously submitted work to complete before performing
+ * the actual freeing operation.
+ */
+void thread_pool_free(ThreadPool *pool);
+
+/*
+ * Submit a new work (task) for the pool.
+ *
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
+ * to the work function at @func.
+ */
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func,
+ void *opaque, GDestroyNotify opaque_destroy);
+
+/*
+ * Submit a new work (task) for the pool, making sure it starts getting
+ * processed immediately, launching a new thread for it if necessary.
+ *
+ * @opaque_destroy is an optional GDestroyNotify for the @opaque argument
+ * to the work function at @func.
+ */
+void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func,
+ void *opaque, GDestroyNotify opaque_destroy);
+
+/*
+ * Wait for all previously submitted work to complete before returning.
+ *
+ * Can be used as a barrier between two sets of tasks executed on a thread
+ * pool without destroying it or in a performance sensitive path where the
+ * caller just wants to wait for all tasks to complete while deferring the
+ * pool free operation for later, less performance sensitive time.
+ */
+void thread_pool_wait(ThreadPool *pool);
-void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
+/* Set the maximum number of threads in the pool. */
+bool thread_pool_set_max_threads(ThreadPool *pool, int max_threads);
+
+/*
+ * Adjust the maximum number of threads in the pool to give each task its
+ * own thread (exactly one thread per task).
+ */
+bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool);
#endif
diff --git a/include/block/ufs.h b/include/block/ufs.h
index 57f5ea3..a3ee62b 100644
--- a/include/block/ufs.h
+++ b/include/block/ufs.h
@@ -461,7 +461,7 @@ typedef struct Attributes {
uint8_t psa_state;
uint32_t psa_data_size;
uint8_t ref_clk_gating_wait_time;
- uint8_t device_case_rough_temperaure;
+ uint8_t device_case_rough_temperature;
uint8_t device_too_high_temp_boundary;
uint8_t device_too_low_temp_boundary;
uint8_t throttling_status;
@@ -1073,6 +1073,11 @@ enum health_desc_param {
UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4,
};
+enum {
+ UFS_DEV_HIGH_TEMP_NOTIF = BIT(4),
+ UFS_DEV_LOW_TEMP_NOTIF = BIT(5),
+};
+
/* WriteBooster buffer mode */
enum {
UFS_WB_BUF_MODE_LU_DEDICATED = 0x0,
@@ -1091,6 +1096,12 @@ enum ufs_lu_wp_type {
UFS_LU_PERM_WP = 0x02,
};
+/* Exception event mask values */
+enum {
+ MASK_EE_TOO_HIGH_TEMP = BIT(3),
+ MASK_EE_TOO_LOW_TEMP = BIT(4),
+};
+
/* UTP QUERY Transaction Specific Fields OpCode */
enum query_opcode {
UFS_UPIU_QUERY_OPCODE_NOP = 0x0,