diff options
Diffstat (limited to 'include/block')
| -rw-r--r-- | include/block/aio.h | 14 | ||||
| -rw-r--r-- | include/block/aio_task.h | 2 | ||||
| -rw-r--r-- | include/block/block-common.h | 13 | ||||
| -rw-r--r-- | include/block/block-copy.h | 1 | ||||
| -rw-r--r-- | include/block/block-global-state.h | 69 | ||||
| -rw-r--r-- | include/block/block-io.h | 4 | ||||
| -rw-r--r-- | include/block/block_int-common.h | 67 | ||||
| -rw-r--r-- | include/block/block_int-global-state.h | 6 | ||||
| -rw-r--r-- | include/block/block_int-io.h | 4 | ||||
| -rw-r--r-- | include/block/blockjob.h | 4 | ||||
| -rw-r--r-- | include/block/export.h | 3 | ||||
| -rw-r--r-- | include/block/graph-lock.h | 13 | ||||
| -rw-r--r-- | include/block/nbd.h | 6 | ||||
| -rw-r--r-- | include/block/nvme.h | 106 | ||||
| -rw-r--r-- | include/block/qdict.h | 2 | ||||
| -rw-r--r-- | include/block/raw-aio.h | 19 | ||||
| -rw-r--r-- | include/block/snapshot.h | 6 | ||||
| -rw-r--r-- | include/block/thread-pool.h | 62 | ||||
| -rw-r--r-- | include/block/ufs.h | 13 |
19 files changed, 321 insertions, 93 deletions
diff --git a/include/block/aio.h b/include/block/aio.h index 4ee8193..99ff484 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -20,6 +20,7 @@ #include "qemu/coroutine-core.h" #include "qemu/queue.h" #include "qemu/event_notifier.h" +#include "qemu/lockcnt.h" #include "qemu/thread.h" #include "qemu/timer.h" #include "block/graph-lock.h" @@ -53,7 +54,7 @@ typedef void QEMUBHFunc(void *opaque); typedef bool AioPollFn(void *opaque); typedef void IOHandler(void *opaque); -struct ThreadPool; +struct ThreadPoolAio; struct LinuxAioState; typedef struct LuringState LuringState; @@ -122,6 +123,10 @@ struct BHListSlice { typedef QSLIST_HEAD(, AioHandler) AioHandlerSList; +typedef struct AioPolledEvent { + int64_t ns; /* current polling time in nanoseconds */ +} AioPolledEvent; + struct AioContext { GSource source; @@ -206,7 +211,7 @@ struct AioContext { /* Thread pool for performing work and receiving completion callbacks. * Has its own locking. */ - struct ThreadPool *thread_pool; + struct ThreadPoolAio *thread_pool; #ifdef CONFIG_LINUX_AIO struct LinuxAioState *linux_aio; @@ -228,7 +233,6 @@ struct AioContext { int poll_disable_cnt; /* Polling mode parameters */ - int64_t poll_ns; /* current polling time in nanoseconds */ int64_t poll_max_ns; /* maximum polling time in nanoseconds */ int64_t poll_grow; /* polling time growth factor */ int64_t poll_shrink; /* polling time shrink factor */ @@ -499,8 +503,8 @@ void aio_set_event_notifier_poll(AioContext *ctx, */ GSource *aio_get_g_source(AioContext *ctx); -/* Return the ThreadPool bound to this AioContext */ -struct ThreadPool *aio_get_thread_pool(AioContext *ctx); +/* Return the ThreadPoolAio bound to this AioContext */ +struct ThreadPoolAio *aio_get_thread_pool(AioContext *ctx); /* Setup the LinuxAioState bound to this AioContext */ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); diff --git a/include/block/aio_task.h b/include/block/aio_task.h index 18a9c41..c81d637 100644 --- a/include/block/aio_task.h +++ b/include/block/aio_task.h @@ -40,8 +40,6 @@ void aio_task_pool_free(AioTaskPool *); /* error code of failed task or 0 if all is OK */ int aio_task_pool_status(AioTaskPool *pool); -bool aio_task_pool_empty(AioTaskPool *pool); - /* User provides filled @task, however task->pool will be set automatically */ void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task); diff --git a/include/block/block-common.h b/include/block/block-common.h index 338fe5f..c8c626d 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -257,6 +257,7 @@ typedef enum { #define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" #define BDRV_OPT_DISCARD "discard" #define BDRV_OPT_FORCE_SHARE "force-share" +#define BDRV_OPT_ACTIVE "active" #define BDRV_SECTOR_BITS 9 @@ -332,6 +333,17 @@ typedef enum { #define BDRV_BLOCK_RECURSE 0x40 #define BDRV_BLOCK_COMPRESSED 0x80 +/* + * Block status hints: the bitwise-or of these flags emphasize what + * the caller hopes to learn, and some drivers may be able to give + * faster answers by doing less work when the hint permits. + */ +#define BDRV_WANT_ZERO BDRV_BLOCK_ZERO +#define BDRV_WANT_OFFSET_VALID BDRV_BLOCK_OFFSET_VALID +#define BDRV_WANT_ALLOCATED BDRV_BLOCK_ALLOCATED +#define BDRV_WANT_PRECISE (BDRV_WANT_ZERO | BDRV_WANT_OFFSET_VALID | \ + BDRV_WANT_OFFSET_VALID) + typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { @@ -355,7 +367,6 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_CHANGE, BLOCK_OP_TYPE_COMMIT_SOURCE, BLOCK_OP_TYPE_COMMIT_TARGET, - BLOCK_OP_TYPE_DATAPLANE, BLOCK_OP_TYPE_DRIVE_DEL, BLOCK_OP_TYPE_EJECT, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, diff --git a/include/block/block-copy.h b/include/block/block-copy.h index bdc703b..dd5cc82 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, BlockDriverState *copy_bitmap_bs, const BdrvDirtyBitmap *bitmap, bool discard_source, + uint64_t min_cluster_size, Error **errp); /* Function should be called prior any actual copy request */ diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index bd7cecd..62da83c 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -74,13 +74,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, int GRAPH_WRLOCK bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp); -int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - Error **errp); -BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, - int flags, Error **errp); +int GRAPH_UNLOCKED +bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, Error **errp); +BlockDriverState * GRAPH_UNLOCKED +bdrv_insert_node(BlockDriverState *bs, QDict *node_options, int flags, + Error **errp); int bdrv_drop_filter(BlockDriverState *bs, Error **errp); -BdrvChild * no_coroutine_fn +BdrvChild * no_coroutine_fn GRAPH_UNLOCKED bdrv_open_child(const char *filename, QDict *options, const char *bdref_key, BlockDriverState *parent, const BdrvChildClass *child_class, BdrvChildRole child_role, bool allow_none, Error **errp); @@ -90,9 +91,10 @@ bdrv_co_open_child(const char *filename, QDict *options, const char *bdref_key, BlockDriverState *parent, const BdrvChildClass *child_class, BdrvChildRole child_role, bool allow_none, Error **errp); -int bdrv_open_file_child(const char *filename, - QDict *options, const char *bdref_key, - BlockDriverState *parent, Error **errp); +int GRAPH_UNLOCKED +bdrv_open_file_child(const char *filename, QDict *options, + const char *bdref_key, BlockDriverState *parent, + Error **errp); BlockDriverState * no_coroutine_fn bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); @@ -100,11 +102,9 @@ bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); BlockDriverState * coroutine_fn no_co_wrapper bdrv_co_open_blockdev_ref(BlockdevRef *ref, Error **errp); -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); int GRAPH_WRLOCK -bdrv_set_backing_hd_drained(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); +bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); @@ -123,11 +123,12 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, Error **errp); BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp); -BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, QDict *options, - bool keep_old_opts); +BlockReopenQueue * GRAPH_UNLOCKED +bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, + QDict *options, bool keep_old_opts); void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); -int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int GRAPH_UNLOCKED +bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, Error **errp); int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, @@ -143,9 +144,10 @@ int bdrv_commit(BlockDriverState *bs); int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp); void bdrv_register(BlockDriver *bdrv); -int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - const char *backing_file_str, - bool backing_mask_protocol); +int GRAPH_UNLOCKED +bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str, + bool backing_mask_protocol); BlockDriverState * GRAPH_RDLOCK bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs); @@ -175,21 +177,27 @@ BlockDriverState * GRAPH_RDLOCK check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); + +bool GRAPH_RDLOCK bdrv_is_inactive(BlockDriverState *bs); + int no_coroutine_fn GRAPH_RDLOCK bdrv_activate(BlockDriverState *bs, Error **errp); int coroutine_fn no_co_wrapper_bdrv_rdlock bdrv_co_activate(BlockDriverState *bs, Error **errp); +int no_coroutine_fn GRAPH_RDLOCK +bdrv_inactivate(BlockDriverState *bs, Error **errp); + void bdrv_activate_all(Error **errp); -int bdrv_inactivate_all(void); +int GRAPH_UNLOCKED bdrv_inactivate_all(void); int bdrv_flush_all(void); -void bdrv_close_all(void); -void bdrv_drain_all_begin(void); +void GRAPH_UNLOCKED bdrv_close_all(void); +void GRAPH_UNLOCKED bdrv_drain_all_begin(void); void bdrv_drain_all_begin_nopoll(void); void bdrv_drain_all_end(void); -void bdrv_drain_all(void); +void GRAPH_UNLOCKED bdrv_drain_all(void); void bdrv_aio_cancel(BlockAIOCB *acb); @@ -268,11 +276,16 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); int bdrv_debug_resume(BlockDriverState *bs, const char *tag); bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); -bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); -int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); +bool GRAPH_RDLOCK +bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +int GRAPH_UNLOCKED +bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +int GRAPH_RDLOCK +bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); diff --git a/include/block/block-io.h b/include/block/block-io.h index b49e053..4cf83fb 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -161,6 +161,8 @@ bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base, int coroutine_fn GRAPH_RDLOCK bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_is_all_zeroes(BlockDriverState *bs); int GRAPH_RDLOCK bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, @@ -429,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, * * This function can be recursive. */ -void bdrv_drained_begin(BlockDriverState *bs); +void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs); /** * bdrv_do_drained_begin_quiesce: diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index ebb4e56..034c063 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -248,7 +248,7 @@ struct BlockDriver { int GRAPH_UNLOCKED_PTR (*bdrv_open)( BlockDriverState *bs, QDict *options, int flags, Error **errp); - void (*bdrv_close)(BlockDriverState *bs); + void GRAPH_UNLOCKED_PTR (*bdrv_close)(BlockDriverState *bs); int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create)( BlockdevCreateOptions *opts, Error **errp); @@ -396,9 +396,23 @@ struct BlockDriver { int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)( BlockDriverState *bs, HDGeometry *geo); + /** + * Hot add a BDS's child. Used in combination with bdrv_del_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_add_child)( BlockDriverState *parent, BlockDriverState *child, Error **errp); + /** + * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_del_child)( BlockDriverState *parent, BdrvChild *child, Error **errp); @@ -506,10 +520,6 @@ struct BlockDriver { BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_flush)( BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB * GRAPH_RDLOCK_PTR (*bdrv_aio_pdiscard)( - BlockDriverState *bs, int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_readv)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); @@ -608,15 +618,16 @@ struct BlockDriver { * according to the current layer, and should only need to set * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing - * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See - * block.h for the overall meaning of the bits. As a hint, the - * flag want_zero is true if the caller cares more about precise - * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for - * overall allocation (favor larger *pnum, perhaps by reporting - * _DATA instead of _ZERO). The block layer guarantees input - * clamped to bdrv_getlength() and aligned to request_alignment, - * as well as non-NULL pnum, map, and file; in turn, the driver - * must return an error or set pnum to an aligned non-zero value. + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). The + * caller will synthesize BDRV_BLOCK_ALLOCATED based on the + * non-zero results. See block.h for the overall meaning of the + * bits. As a hint, the flags in @mode may include a bitwise-or + * of BDRV_WANT_ALLOCATED, BDRV_WANT_OFFSET_VALID, or + * BDRV_WANT_ZERO based on what the caller is looking for in the + * results. The block layer guarantees input clamped to + * bdrv_getlength() and aligned to request_alignment, as well as + * non-NULL pnum, map, and file; in turn, the driver must return + * an error or set pnum to an aligned non-zero value. * * Note that @bytes is just a hint on how big of a region the * caller wants to inspect. It is not a limit on *pnum. @@ -628,8 +639,8 @@ struct BlockDriver { * to clamping *pnum for return to its caller. */ int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_block_status)( - BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + BlockDriverState *bs, unsigned int mode, + int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); /* @@ -653,8 +664,8 @@ struct BlockDriver { QEMUIOVector *qiov, size_t qiov_offset); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_snapshot_block_status)( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file); + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_pdiscard_snapshot)( BlockDriverState *bs, int64_t offset, int64_t bytes); @@ -986,9 +997,21 @@ struct BdrvChildClass { bool backing_mask_protocol, Error **errp); - bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); + /* + * Notifies the parent that the child is trying to change its AioContext. + * The parent may in turn change the AioContext of other nodes in the same + * transaction. Returns true if the change is possible and the transaction + * can be continued. Returns false and sets @errp if not and the transaction + * must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ + bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, + GHashTable *visited, + Transaction *tran, Error **errp); /* * I/O API functions. These functions are thread-safe. @@ -1230,7 +1253,7 @@ struct BlockDriverState { /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; - /* Accessed with atomic ops. */ + /* Accessed only in the main thread. */ int quiesce_counter; unsigned int write_gen; /* Current data generation */ diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index eb2d92a..e7c8f1a 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -139,7 +139,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, * @buf_size: The amount of data that can be in flight at one time. * @mode: Whether to collapse all images in the chain to the target. * @backing_mode: How to establish the target's backing chain after completion. - * @zero_target: Whether the target should be explicitly zero-initialized + * @target_is_zero: Whether the target already is zero-initialized. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. * @unmap: Whether to unmap target where source sectors only contain zeroes. @@ -159,7 +159,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, int creation_flags, int64_t speed, uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, + bool target_is_zero, BlockdevOnError on_source_error, BlockdevOnError on_target_error, bool unmap, const char *filter_node_name, @@ -179,6 +179,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, * all ".has_*" fields are ignored. * @on_source_error: The action to take upon error reading from the source. * @on_target_error: The action to take upon error writing to the target. + * @on_cbw_error: The action to take upon error in copy-before-write operations. * @creation_flags: Flags that control the behavior of the Job lifetime. * See @BlockJobCreateFlags * @cb: Completion function for the job. @@ -198,6 +199,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BackupPerf *perf, BlockdevOnError on_source_error, BlockdevOnError on_target_error, + OnCbwError on_cbw_error, int creation_flags, BlockCompletionFunc *cb, void *opaque, JobTxn *txn, Error **errp); diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4a7cf2b..4f94eb3 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -38,8 +38,8 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); int coroutine_fn GRAPH_RDLOCK bdrv_co_snapshot_block_status( - BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file); + BlockDriverState *bs, unsigned int mode, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 7061ab7..85284cb 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id); * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. + * + * All block nodes must be drained. */ int GRAPH_WRLOCK block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, @@ -149,7 +151,7 @@ block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, * Remove all BlockDriverStates from the list of nodes that are involved in the * job. This removes the blockers added with block_job_add_bdrv(). */ -void block_job_remove_all_bdrv(BlockJob *job); +void GRAPH_UNLOCKED block_job_remove_all_bdrv(BlockJob *job); /** * block_job_has_bdrv: diff --git a/include/block/export.h b/include/block/export.h index f2fe0f8..4bd9531 100644 --- a/include/block/export.h +++ b/include/block/export.h @@ -29,6 +29,9 @@ typedef struct BlockExportDriver { */ size_t instance_size; + /* True if the export type supports running on an inactive node */ + bool supports_inactive; + /* Creates and starts a new block export */ int (*create)(BlockExport *, BlockExportOptions *, Error **); diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index dc8d949..95bf5ed 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -20,8 +20,6 @@ #ifndef GRAPH_LOCK_H #define GRAPH_LOCK_H -#include "qemu/clang-tsa.h" - /** * Graph Lock API * This API provides a rwlock used to protect block layer @@ -115,9 +113,20 @@ void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA bdrv_graph_wrlock(void); /* + * bdrv_graph_wrlock_drained: + * Similar to bdrv_graph_wrlock, but will begin a drained section before + * locking. + */ +void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA +bdrv_graph_wrlock_drained(void); + +/* * bdrv_graph_wrunlock: * Write finished, reset global has_writer to 0 and restart * all readers that are waiting. + * + * Also ends the drained section if bdrv_graph_wrlock_drained() was used to lock + * the graph. */ void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA bdrv_graph_wrunlock(void); diff --git a/include/block/nbd.h b/include/block/nbd.h index d4f8b21..92987c7 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -428,9 +428,9 @@ void nbd_client_put(NBDClient *client); void nbd_server_is_qemu_nbd(int max_connections); bool nbd_server_is_running(void); int nbd_server_max_connections(void); -void nbd_server_start(SocketAddress *addr, const char *tls_creds, - const char *tls_authz, uint32_t max_connections, - Error **errp); +void nbd_server_start(SocketAddress *addr, uint32_t handshake_max_secs, + const char *tls_creds, const char *tls_authz, + uint32_t max_connections, Error **errp); void nbd_server_start_options(NbdServerOptions *arg, Error **errp); /* nbd_read diff --git a/include/block/nvme.h b/include/block/nvme.h index 5298bc4..358e516 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -142,9 +142,9 @@ enum NvmeCapMask { ((cap) |= (uint64_t)((val) & CAP_CMBS_MASK) << CAP_CMBS_SHIFT) enum NvmeCapCss { - NVME_CAP_CSS_NVM = 1 << 0, - NVME_CAP_CSS_CSI_SUPP = 1 << 6, - NVME_CAP_CSS_ADMIN_ONLY = 1 << 7, + NVME_CAP_CSS_NCSS = 1 << 0, + NVME_CAP_CSS_IOCSS = 1 << 6, + NVME_CAP_CSS_NOIOCSS = 1 << 7, }; enum NvmeCcShift { @@ -177,7 +177,7 @@ enum NvmeCcMask { enum NvmeCcCss { NVME_CC_CSS_NVM = 0x0, - NVME_CC_CSS_CSI = 0x6, + NVME_CC_CSS_ALL = 0x6, NVME_CC_CSS_ADMIN_ONLY = 0x7, }; @@ -906,8 +906,7 @@ enum NvmeStatusCodes { NVME_SGL_DESCR_TYPE_INVALID = 0x0011, NVME_INVALID_USE_OF_CMB = 0x0012, NVME_INVALID_PRP_OFFSET = 0x0013, - NVME_CMD_SET_CMB_REJECTED = 0x002b, - NVME_INVALID_CMD_SET = 0x002c, + NVME_COMMAND_INTERRUPTED = 0x0021, NVME_FDP_DISABLED = 0x0029, NVME_INVALID_PHID_LIST = 0x002a, NVME_LBA_RANGE = 0x0080, @@ -940,6 +939,10 @@ enum NvmeStatusCodes { NVME_INVALID_SEC_CTRL_STATE = 0x0120, NVME_INVALID_NUM_RESOURCES = 0x0121, NVME_INVALID_RESOURCE_ID = 0x0122, + NVME_IOCS_NOT_SUPPORTED = 0x0129, + NVME_IOCS_NOT_ENABLED = 0x012a, + NVME_IOCS_COMBINATION_REJECTED = 0x012b, + NVME_INVALID_IOCS = 0x012c, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, @@ -1015,6 +1018,40 @@ typedef struct QEMU_PACKED NvmeSmartLog { uint8_t reserved2[320]; } NvmeSmartLog; +typedef struct QEMU_PACKED NvmeSmartLogExtended { + uint64_t physical_media_units_written[2]; + uint64_t physical_media_units_read[2]; + uint64_t bad_user_blocks; + uint64_t bad_system_nand_blocks; + uint64_t xor_recovery_count; + uint64_t uncorrectable_read_error_count; + uint64_t soft_ecc_error_count; + uint64_t end2end_correction_counts; + uint8_t system_data_percent_used; + uint8_t refresh_counts[7]; + uint64_t user_data_erase_counts; + uint16_t thermal_throttling_stat_and_count; + uint16_t dssd_spec_version[3]; + uint64_t pcie_correctable_error_count; + uint32_t incomplete_shutdowns; + uint32_t rsvd116; + uint8_t percent_free_blocks; + uint8_t rsvd121[7]; + uint16_t capacity_health; + uint8_t nvme_errata_ver; + uint8_t rsvd131[5]; + uint64_t unaligned_io; + uint64_t security_ver_num; + uint64_t total_nuse; + uint64_t plp_start_count[2]; + uint64_t endurance_estimate[2]; + uint64_t pcie_retraining_count; + uint64_t power_state_change_count; + uint8_t rsvd208[286]; + uint16_t log_page_version; + uint64_t log_page_guid[2]; +} NvmeSmartLogExtended; + #define NVME_SMART_WARN_MAX 6 enum NvmeSmartWarn { NVME_SMART_SPARE = 1 << 0, @@ -1052,6 +1089,12 @@ enum NvmeLogIdentifier { NVME_LOG_FDP_RUH_USAGE = 0x21, NVME_LOG_FDP_STATS = 0x22, NVME_LOG_FDP_EVENTS = 0x23, + NVME_LOG_VENDOR_START = 0xc0, + NVME_LOG_VENDOR_END = 0xff, +}; + +enum NvmeOcpLogIdentifier { + NVME_OCP_EXTENDED_SMART_INFO = 0xc0, }; typedef struct QEMU_PACKED NvmePSD { @@ -1077,6 +1120,7 @@ enum NvmeIdCns { NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_CTRL = 0x06, NVME_ID_CNS_CS_NS_ACTIVE_LIST = 0x07, + NVME_ID_CNS_CS_IND_NS = 0x08, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, @@ -1087,6 +1131,7 @@ enum NvmeIdCns { NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET = 0x1c, + NVME_ID_CNS_CS_IND_NS_ALLOCATED = 0x1f, }; typedef struct QEMU_PACKED NvmeIdCtrl { @@ -1165,6 +1210,8 @@ typedef struct NvmeIdCtrlZoned { uint8_t rsvd1[4095]; } NvmeIdCtrlZoned; +#define NVME_ID_CTRL_NVM_DMRL_MAX 255 + typedef struct NvmeIdCtrlNvm { uint8_t vsl; uint8_t wzsl; @@ -1182,6 +1229,7 @@ enum NvmeIdCtrlOaes { enum NvmeIdCtrlCtratt { NVME_CTRATT_ENDGRPS = 1 << 4, NVME_CTRATT_ELBAS = 1 << 15, + NVME_CTRATT_MEM = 1 << 16, NVME_CTRATT_FDPS = 1 << 19, }; @@ -1189,9 +1237,10 @@ enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, NVME_OACS_FW = 1 << 2, - NVME_OACS_NS_MGMT = 1 << 3, + NVME_OACS_NMS = 1 << 3, NVME_OACS_DIRECTIVES = 1 << 5, - NVME_OACS_DBBUF = 1 << 8, + NVME_OACS_VMS = 1 << 7, + NVME_OACS_DBCS = 1 << 8, }; enum NvmeIdCtrlOncs { @@ -1285,6 +1334,8 @@ enum NvmeNsAttachmentOperation { #define NVME_ERR_REC_TLER(err_rec) (err_rec & 0xffff) #define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x10000) +#define NVME_ID_CTRL_CTRATT_MEM(ctratt) (ctratt & NVME_CTRATT_MEM) + enum NvmeFeatureIds { NVME_ARBITRATION = 0x1, NVME_POWER_MANAGEMENT = 0x2, @@ -1413,9 +1464,28 @@ typedef struct QEMU_PACKED NvmeIdNsNvm { uint8_t pic; uint8_t rsvd9[3]; uint32_t elbaf[NVME_MAX_NLBAF]; - uint8_t rsvd268[3828]; + uint32_t npdgl; + uint32_t nprg; + uint32_t npra; + uint32_t nors; + uint32_t npdal; + uint8_t rsvd288[3808]; } NvmeIdNsNvm; +typedef struct QEMU_PACKED NvmeIdNsInd { + uint8_t nsfeat; + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint32_t anagrpid; + uint8_t nsattr; + uint8_t rsvd9; + uint16_t nvmsetid; + uint16_t endgrpid; + uint8_t nstat; + uint8_t rsvd15[4081]; +} NvmeIdNsInd; + typedef struct QEMU_PACKED NvmeIdNsDescr { uint8_t nidt; uint8_t nidl; @@ -1436,8 +1506,10 @@ enum NvmeNsIdentifierType { NVME_NIDT_CSI = 0x04, }; -enum NvmeIdNsNmic { - NVME_NMIC_NS_SHARED = 1 << 0, +enum NvmeIdNsIndependent { + NVME_ID_NS_IND_NMIC_SHRNS = 1 << 0, + NVME_ID_NS_IND_NMIC_DISNS = 1 << 1, + NVME_ID_NS_IND_NSTAT_NRDY = 1 << 0, }; enum NvmeCsi { @@ -1515,6 +1587,16 @@ enum NvmeIdNsMc { NVME_ID_NS_MC_SEPARATE = 1 << 1, }; +enum NvmeIdNsNsfeat { + NVME_ID_NS_NSFEAT_THINP = 1 << 0, + NVME_ID_NS_NSFEAT_NSABPNS = 1 << 1, + NVME_ID_NS_NSFEAT_DAE = 1 << 2, + NVME_ID_NS_NSFEAT_UIDREUSE = 1 << 3, + NVME_ID_NS_NSFEAT_OPTPERF_ALL = 3 << 4, + NVME_ID_NS_NSFEAT_MAM = 1 << 6, + NVME_ID_NS_NSFEAT_OPTRPERF = 1 << 7, +}; + #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) enum NvmePIFormat { @@ -1863,6 +1945,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); + QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLogExtended) != 512); QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrlZoned) != 4096); @@ -1870,6 +1953,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsInd) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16); diff --git a/include/block/qdict.h b/include/block/qdict.h index b4c28d9..53c4df4 100644 --- a/include/block/qdict.h +++ b/include/block/qdict.h @@ -10,7 +10,7 @@ #ifndef BLOCK_QDICT_H #define BLOCK_QDICT_H -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" QObject *qdict_crumple(const QDict *src, Error **errp); void qdict_flatten(QDict *qdict); diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 6267068..6570244 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -17,6 +17,7 @@ #define QEMU_RAW_AIO_H #include "block/aio.h" +#include "block/block-common.h" #include "qemu/iov.h" /* AIO request types */ @@ -58,11 +59,18 @@ void laio_cleanup(LinuxAioState *s); /* laio_co_submit: submit I/O requests in the thread's current AioContext. */ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, - int type, uint64_t dev_max_batch); + int type, BdrvRequestFlags flags, + uint64_t dev_max_batch); bool laio_has_fdsync(int); +bool laio_has_fua(void); void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); +#else +static inline bool laio_has_fua(void) +{ + return false; +} #endif /* io_uring.c - Linux io_uring implementation */ #ifdef CONFIG_LINUX_IO_URING @@ -71,9 +79,16 @@ void luring_cleanup(LuringState *s); /* luring_co_submit: submit I/O requests in the thread's current AioContext. */ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset, - QEMUIOVector *qiov, int type); + QEMUIOVector *qiov, int type, + BdrvRequestFlags flags); void luring_detach_aio_context(LuringState *s, AioContext *old_context); void luring_attach_aio_context(LuringState *s, AioContext *new_context); +bool luring_has_fua(void); +#else +static inline bool luring_has_fua(void) +{ + return false; +} #endif #ifdef _WIN32 diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 304cc6e..2316a43 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -90,9 +90,9 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, bool bdrv_all_can_snapshot(bool has_devices, strList *devices, Error **errp); -int bdrv_all_delete_snapshot(const char *name, - bool has_devices, strList *devices, - Error **errp); +int GRAPH_UNLOCKED +bdrv_all_delete_snapshot(const char *name, bool has_devices, strList *devices, + Error **errp); int bdrv_all_goto_snapshot(const char *name, bool has_devices, strList *devices, Error **errp); diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h index 948ff5f..dd48cf0 100644 --- a/include/block/thread-pool.h +++ b/include/block/thread-pool.h @@ -24,20 +24,70 @@ typedef int ThreadPoolFunc(void *opaque); -typedef struct ThreadPool ThreadPool; +typedef struct ThreadPoolAio ThreadPoolAio; -ThreadPool *thread_pool_new(struct AioContext *ctx); -void thread_pool_free(ThreadPool *pool); +ThreadPoolAio *thread_pool_new_aio(struct AioContext *ctx); +void thread_pool_free_aio(ThreadPoolAio *pool); /* - * thread_pool_submit* API: submit I/O requests in the thread's + * thread_pool_submit_{aio,co} API: submit I/O requests in the thread's * current AioContext. */ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, BlockCompletionFunc *cb, void *opaque); int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg); -void thread_pool_submit(ThreadPoolFunc *func, void *arg); +void thread_pool_update_params(ThreadPoolAio *pool, struct AioContext *ctx); + +/* ------------------------------------------- */ +/* Generic thread pool types and methods below */ +typedef struct ThreadPool ThreadPool; + +/* Create a new thread pool. Never returns NULL. */ +ThreadPool *thread_pool_new(void); + +/* + * Free the thread pool. + * Waits for all the previously submitted work to complete before performing + * the actual freeing operation. + */ +void thread_pool_free(ThreadPool *pool); + +/* + * Submit a new work (task) for the pool. + * + * @opaque_destroy is an optional GDestroyNotify for the @opaque argument + * to the work function at @func. + */ +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, + void *opaque, GDestroyNotify opaque_destroy); + +/* + * Submit a new work (task) for the pool, making sure it starts getting + * processed immediately, launching a new thread for it if necessary. + * + * @opaque_destroy is an optional GDestroyNotify for the @opaque argument + * to the work function at @func. + */ +void thread_pool_submit_immediate(ThreadPool *pool, ThreadPoolFunc *func, + void *opaque, GDestroyNotify opaque_destroy); + +/* + * Wait for all previously submitted work to complete before returning. + * + * Can be used as a barrier between two sets of tasks executed on a thread + * pool without destroying it or in a performance sensitive path where the + * caller just wants to wait for all tasks to complete while deferring the + * pool free operation for later, less performance sensitive time. + */ +void thread_pool_wait(ThreadPool *pool); -void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); +/* Set the maximum number of threads in the pool. */ +bool thread_pool_set_max_threads(ThreadPool *pool, int max_threads); + +/* + * Adjust the maximum number of threads in the pool to give each task its + * own thread (exactly one thread per task). + */ +bool thread_pool_adjust_max_threads_to_work(ThreadPool *pool); #endif diff --git a/include/block/ufs.h b/include/block/ufs.h index 57f5ea3..a3ee62b 100644 --- a/include/block/ufs.h +++ b/include/block/ufs.h @@ -461,7 +461,7 @@ typedef struct Attributes { uint8_t psa_state; uint32_t psa_data_size; uint8_t ref_clk_gating_wait_time; - uint8_t device_case_rough_temperaure; + uint8_t device_case_rough_temperature; uint8_t device_too_high_temp_boundary; uint8_t device_too_low_temp_boundary; uint8_t throttling_status; @@ -1073,6 +1073,11 @@ enum health_desc_param { UFS_HEALTH_DESC_PARAM_LIFE_TIME_EST_B = 0x4, }; +enum { + UFS_DEV_HIGH_TEMP_NOTIF = BIT(4), + UFS_DEV_LOW_TEMP_NOTIF = BIT(5), +}; + /* WriteBooster buffer mode */ enum { UFS_WB_BUF_MODE_LU_DEDICATED = 0x0, @@ -1091,6 +1096,12 @@ enum ufs_lu_wp_type { UFS_LU_PERM_WP = 0x02, }; +/* Exception event mask values */ +enum { + MASK_EE_TOO_HIGH_TEMP = BIT(3), + MASK_EE_TOO_LOW_TEMP = BIT(4), +}; + /* UTP QUERY Transaction Specific Fields OpCode */ enum query_opcode { UFS_UPIU_QUERY_OPCODE_NOP = 0x0, |
