diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/block-gen.h | 49 | ||||
-rw-r--r-- | block/coroutines.h | 65 | ||||
-rw-r--r-- | block/io.c | 337 | ||||
-rw-r--r-- | block/meson.build | 8 | ||||
-rw-r--r-- | block/nvme.c | 73 |
5 files changed, 213 insertions, 319 deletions
diff --git a/block/block-gen.h b/block/block-gen.h new file mode 100644 index 0000000..f80cf48 --- /dev/null +++ b/block/block-gen.h @@ -0,0 +1,49 @@ +/* + * Block coroutine wrapping core, used by auto-generated block/block-gen.c + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2020 Virtuozzo International GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_BLOCK_GEN_H +#define BLOCK_BLOCK_GEN_H + +#include "block/block_int.h" + +/* Base structure for argument packing structures */ +typedef struct BdrvPollCo { + BlockDriverState *bs; + bool in_progress; + int ret; + Coroutine *co; /* Keep pointer here for debugging */ +} BdrvPollCo; + +static inline int bdrv_poll_co(BdrvPollCo *s) +{ + assert(!qemu_in_coroutine()); + + bdrv_coroutine_enter(s->bs, s->co); + BDRV_POLL_WHILE(s->bs, s->in_progress); + + return s->ret; +} + +#endif /* BLOCK_BLOCK_GEN_H */ diff --git a/block/coroutines.h b/block/coroutines.h new file mode 100644 index 0000000..f69179f --- /dev/null +++ b/block/coroutines.h @@ -0,0 +1,65 @@ +/* + * Block layer I/O functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_COROUTINES_INT_H +#define BLOCK_COROUTINES_INT_H + +#include "block/block_int.h" + +int coroutine_fn bdrv_co_check(BlockDriverState *bs, + BdrvCheckResult *res, BdrvCheckMode fix); +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); + +int generated_co_wrapper +bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); +int generated_co_wrapper +bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); +int generated_co_wrapper +bdrv_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); + +int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); +int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); + +#endif /* BLOCK_COROUTINES_INT_H */ @@ -29,6 +29,7 @@ #include "block/blockjob.h" #include "block/blockjob_int.h" #include "block/block_int.h" +#include "block/coroutines.h" #include "qemu/cutils.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -889,89 +890,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, return 0; } -typedef int coroutine_fn BdrvRequestEntry(void *opaque); -typedef struct BdrvRunCo { - BdrvRequestEntry *entry; - void *opaque; - int ret; - bool done; - Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */ -} BdrvRunCo; - -static void coroutine_fn bdrv_run_co_entry(void *opaque) -{ - BdrvRunCo *arg = opaque; - - arg->ret = arg->entry(arg->opaque); - arg->done = true; - aio_wait_kick(); -} - -static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry, - void *opaque) -{ - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - return entry(opaque); - } else { - BdrvRunCo s = { .entry = entry, .opaque = opaque }; - - s.co = qemu_coroutine_create(bdrv_run_co_entry, &s); - bdrv_coroutine_enter(bs, s.co); - - BDRV_POLL_WHILE(bs, !s.done); - - return s.ret; - } -} - -typedef struct RwCo { - BdrvChild *child; - int64_t offset; - QEMUIOVector *qiov; - bool is_write; - BdrvRequestFlags flags; -} RwCo; - -static int coroutine_fn bdrv_rw_co_entry(void *opaque) -{ - RwCo *rwco = opaque; - - if (!rwco->is_write) { - return bdrv_co_preadv(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); - } else { - return bdrv_co_pwritev(rwco->child, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); - } -} - -/* - * Process a vectored synchronous request using coroutines - */ -static int bdrv_prwv_co(BdrvChild *child, int64_t offset, - QEMUIOVector *qiov, bool is_write, - BdrvRequestFlags flags) -{ - RwCo rwco = { - .child = child, - .offset = offset, - .qiov = qiov, - .is_write = is_write, - .flags = flags, - }; - - return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco); -} - int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags) { - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes); - - return bdrv_prwv_co(child, offset, &qiov, true, + return bdrv_pwritev(child, offset, bytes, NULL, BDRV_REQ_ZERO_WRITE | flags); } @@ -1016,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) } } -/* return < 0 if error. See bdrv_pwrite() for the return codes */ -int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) -{ - int ret; - - ret = bdrv_prwv_co(child, offset, qiov, false, 0); - if (ret < 0) { - return ret; - } - - return qiov->size; -} - /* See bdrv_pwrite() for the return codes */ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes) { + int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - return bdrv_preadv(child, offset, &qiov); -} - -int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) -{ - int ret; + ret = bdrv_preadv(child, offset, bytes, &qiov, 0); - ret = bdrv_prwv_co(child, offset, qiov, true, 0); - if (ret < 0) { - return ret; - } - - return qiov->size; + return ret < 0 ? ret : bytes; } /* Return no. of bytes on success or < 0 on error. Important errors are: @@ -1061,13 +961,16 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov) */ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes) { + int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); if (bytes < 0) { return -EINVAL; } - return bdrv_pwritev(child, offset, &qiov); + ret = bdrv_pwritev(child, offset, bytes, &qiov, 0); + + return ret < 0 ? ret : bytes; } /* @@ -2243,18 +2146,6 @@ int bdrv_flush_all(void) return result; } - -typedef struct BdrvCoBlockStatusData { - BlockDriverState *bs; - BlockDriverState *base; - bool want_zero; - int64_t offset; - int64_t bytes; - int64_t *pnum; - int64_t *map; - BlockDriverState **file; -} BdrvCoBlockStatusData; - /* * Returns the allocation status of the specified sectors. * Drivers not implementing the functionality are assumed to not support @@ -2449,14 +2340,15 @@ early_out: return ret; } -static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool want_zero, - int64_t offset, - int64_t bytes, - int64_t *pnum, - int64_t *map, - BlockDriverState **file) +int coroutine_fn +bdrv_co_common_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file) { BlockDriverState *p; int ret = 0; @@ -2489,43 +2381,6 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, return ret; } -/* Coroutine wrapper for bdrv_block_status_above() */ -static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque) -{ - BdrvCoBlockStatusData *data = opaque; - - return bdrv_co_block_status_above(data->bs, data->base, - data->want_zero, - data->offset, data->bytes, - data->pnum, data->map, data->file); -} - -/* - * Synchronous wrapper around bdrv_co_block_status_above(). - * - * See bdrv_co_block_status_above() for details. - */ -static int bdrv_common_block_status_above(BlockDriverState *bs, - BlockDriverState *base, - bool want_zero, int64_t offset, - int64_t bytes, int64_t *pnum, - int64_t *map, - BlockDriverState **file) -{ - BdrvCoBlockStatusData data = { - .bs = bs, - .base = base, - .want_zero = want_zero, - .offset = offset, - .bytes = bytes, - .pnum = pnum, - .map = map, - .file = file, - }; - - return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data); -} - int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) @@ -2619,96 +2474,70 @@ int bdrv_is_allocated_above(BlockDriverState *top, return 0; } -typedef struct BdrvVmstateCo { - BlockDriverState *bs; - QEMUIOVector *qiov; - int64_t pos; - bool is_read; -} BdrvVmstateCo; - -static int coroutine_fn -bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, - bool is_read) +int coroutine_fn +bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret = -ENOTSUP; + if (!drv) { + return -ENOMEDIUM; + } + bdrv_inc_in_flight(bs); - if (!drv) { - ret = -ENOMEDIUM; - } else if (drv->bdrv_load_vmstate) { - if (is_read) { - ret = drv->bdrv_load_vmstate(bs, qiov, pos); - } else { - ret = drv->bdrv_save_vmstate(bs, qiov, pos); - } + if (drv->bdrv_load_vmstate) { + ret = drv->bdrv_load_vmstate(bs, qiov, pos); } else if (child_bs) { - ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read); + ret = bdrv_co_readv_vmstate(child_bs, qiov, pos); } bdrv_dec_in_flight(bs); + return ret; } -static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +int coroutine_fn +bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BdrvVmstateCo *co = opaque; + BlockDriver *drv = bs->drv; + BlockDriverState *child_bs = bdrv_primary_bs(bs); + int ret = -ENOTSUP; - return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); -} + if (!drv) { + return -ENOMEDIUM; + } -static inline int -bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, - bool is_read) -{ - BdrvVmstateCo data = { - .bs = bs, - .qiov = qiov, - .pos = pos, - .is_read = is_read, - }; + bdrv_inc_in_flight(bs); - return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data); + if (drv->bdrv_save_vmstate) { + ret = drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (child_bs) { + ret = bdrv_co_writev_vmstate(child_bs, qiov, pos); + } + + bdrv_dec_in_flight(bs); + + return ret; } int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); - int ret; + int ret = bdrv_writev_vmstate(bs, &qiov, pos); - ret = bdrv_writev_vmstate(bs, &qiov, pos); - if (ret < 0) { - return ret; - } - - return size; -} - -int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) -{ - return bdrv_rw_vmstate(bs, qiov, pos, false); + return ret < 0 ? ret : size; } int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); - int ret; + int ret = bdrv_readv_vmstate(bs, &qiov, pos); - ret = bdrv_readv_vmstate(bs, &qiov, pos); - if (ret < 0) { - return ret; - } - - return size; -} - -int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) -{ - return bdrv_rw_vmstate(bs, qiov, pos, true); + return ret < 0 ? ret : size; } /**************************************************************/ @@ -2748,11 +2577,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb) /**************************************************************/ /* Coroutine block device emulation */ -static int coroutine_fn bdrv_flush_co_entry(void *opaque) -{ - return bdrv_co_flush(opaque); -} - int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { BdrvChild *primary_child = bdrv_primary_child(bs); @@ -2876,24 +2700,6 @@ early_exit: return ret; } -int bdrv_flush(BlockDriverState *bs) -{ - return bdrv_run_co(bs, bdrv_flush_co_entry, bs); -} - -typedef struct DiscardCo { - BdrvChild *child; - int64_t offset; - int64_t bytes; -} DiscardCo; - -static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque) -{ - DiscardCo *rwco = opaque; - - return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes); -} - int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { @@ -3008,17 +2814,6 @@ out: return ret; } -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) -{ - DiscardCo rwco = { - .child = child, - .offset = offset, - .bytes = bytes, - }; - - return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco); -} - int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; @@ -3420,35 +3215,3 @@ out: return ret; } - -typedef struct TruncateCo { - BdrvChild *child; - int64_t offset; - bool exact; - PreallocMode prealloc; - BdrvRequestFlags flags; - Error **errp; -} TruncateCo; - -static int coroutine_fn bdrv_truncate_co_entry(void *opaque) -{ - TruncateCo *tco = opaque; - - return bdrv_co_truncate(tco->child, tco->offset, tco->exact, - tco->prealloc, tco->flags, tco->errp); -} - -int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) -{ - TruncateCo tco = { - .child = child, - .offset = offset, - .exact = exact, - .prealloc = prealloc, - .flags = flags, - .errp = errp, - }; - - return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco); -} diff --git a/block/meson.build b/block/meson.build index 0b38dc3..78e8b25 100644 --- a/block/meson.build +++ b/block/meson.build @@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h', command: [module_block_py, '@OUTPUT0@', modsrc]) block_ss.add(module_block_h) +wrapper_py = find_program('../scripts/block-coroutine-wrapper.py') +block_gen_c = custom_target('block-gen.c', + output: 'block-gen.c', + input: files('../include/block/block.h', + 'coroutines.h'), + command: [wrapper_py, '@OUTPUT@', '@INPUT@']) +block_ss.add(block_gen_c) + block_ss.add(files('stream.c')) softmmu_ss.add(files('qapi-sysemu.c')) diff --git a/block/nvme.c b/block/nvme.c index f4f27b6..b48f6f2 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -31,7 +31,7 @@ #define NVME_SQ_ENTRY_BYTES 64 #define NVME_CQ_ENTRY_BYTES 16 #define NVME_QUEUE_SIZE 128 -#define NVME_BAR_SIZE 8192 +#define NVME_DOORBELL_SIZE 4096 /* * We have to leave one slot empty as that is the full queue case where @@ -81,15 +81,6 @@ typedef struct { QEMUBH *completion_bh; } NVMeQueuePair; -/* Memory mapped registers */ -typedef volatile struct { - NvmeBar ctrl; - struct { - uint32_t sq_tail; - uint32_t cq_head; - } doorbells[]; -} NVMeRegs; - #define INDEX_ADMIN 0 #define INDEX_IO(n) (1 + n) @@ -102,7 +93,11 @@ enum { struct BDRVNVMeState { AioContext *aio_context; QEMUVFIOState *vfio; - NVMeRegs *regs; + /* Memory mapped registers */ + volatile struct { + uint32_t sq_tail; + uint32_t cq_head; + } *doorbells; /* The submission/completion queue pairs. * [0]: admin queue. * [1..]: io queues. @@ -247,14 +242,14 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s, error_propagate(errp, local_err); goto fail; } - q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail; + q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail; nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err); if (local_err) { error_propagate(errp, local_err); goto fail; } - q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head; + q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head; return q; fail: @@ -694,6 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, uint64_t timeout_ms; uint64_t deadline, now; Error *local_err = NULL; + volatile NvmeBar *regs = NULL; qemu_co_mutex_init(&s->dma_map_lock); qemu_co_queue_init(&s->dma_flush_queue); @@ -712,32 +708,32 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, goto out; } - s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp); - if (!s->regs) { + regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar), + PROT_READ | PROT_WRITE, errp); + if (!regs) { ret = -EINVAL; goto out; } - /* Perform initialize sequence as described in NVMe spec "7.6.1 * Initialization". */ - cap = le64_to_cpu(s->regs->ctrl.cap); - if (!(cap & (1ULL << 37))) { + cap = le64_to_cpu(regs->cap); + if (!NVME_CAP_CSS(cap)) { error_setg(errp, "Device doesn't support NVMe command set"); ret = -EINVAL; goto out; } - s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF))); - s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t); + s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap)); + s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t); bs->bl.opt_mem_alignment = s->page_size; - timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000); + timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000); /* Reset device to get a clean state. */ - s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE); + regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE); /* Wait for CSTS.RDY = 0. */ deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS; - while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) { + while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to reset (%" PRId64 " ms)", @@ -747,6 +743,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } } + s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar), + NVME_DOORBELL_SIZE, PROT_WRITE, errp); + if (!s->doorbells) { + ret = -EINVAL; + goto out; + } + /* Set up admin queue. */ s->queues = g_new(NVMeQueuePair *, 1); s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0, @@ -758,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } s->nr_queues = 1; QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); - s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE); - s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); - s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); + regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) | + (NVME_QUEUE_SIZE << AQA_ASQS_SHIFT)); + regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova); + regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova); /* After setting up all control registers we can enable device now. */ - s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) | - (ctz32(NVME_SQ_ENTRY_BYTES) << 16) | - 0x1); + regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) | + (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) | + CC_EN_MASK); /* Wait for CSTS.RDY = 1. */ now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - deadline = now + timeout_ms * 1000000; - while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) { + deadline = now + timeout_ms * SCALE_MS; + while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { error_setg(errp, "Timeout while waiting for device to start (%" PRId64 " ms)", @@ -800,6 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, ret = -EIO; } out: + if (regs) { + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar)); + } + /* Cleaning up is done in nvme_file_open() upon error. */ return ret; } @@ -872,7 +880,8 @@ static void nvme_close(BlockDriverState *bs) &s->irq_notifier[MSIX_SHARED_IRQ_IDX], false, NULL, NULL); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); - qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE); + qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells, + sizeof(NvmeBar), NVME_DOORBELL_SIZE); qemu_vfio_close(s->vfio); g_free(s->device); |