diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-07-22 18:42:29 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-07-22 18:42:29 +0100 |
commit | ecb199b177b3d94c1dfba2e7ba4595d368f780f7 (patch) | |
tree | d177e7167cf62b61f3aa97d6d7fb7b72225eccf6 | |
parent | 23da9e297b4120ca9702cabec91599a44255fe96 (diff) | |
parent | 43eaaaef0e18817bf78d8f135993f8579cad2cc6 (diff) | |
download | qemu-ecb199b177b3d94c1dfba2e7ba4595d368f780f7.zip qemu-ecb199b177b3d94c1dfba2e7ba4595d368f780f7.tar.gz qemu-ecb199b177b3d94c1dfba2e7ba4595d368f780f7.tar.bz2 |
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2019-07-22' into staging
Block patches for 4.1.0-rc2:
- NVMe block driver fixes
- Drain/AioContext fixes
# gpg: Signature made Mon 22 Jul 2019 17:44:45 BST
# gpg: using RSA key 91BEB60A30DB3E8857D11829F407DB0061D5CF40
# gpg: issuer "mreitz@redhat.com"
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>" [full]
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40
* remotes/maxreitz/tags/pull-block-2019-07-22:
block: Only the main loop can change AioContexts
block: Dec. drained_end_counter before bdrv_wakeup
block/nvme: don't touch the completion entries
block/nvme: support larger that 512 bytes sector devices
block/nvme: fix doorbell stride
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block.c | 13 | ||||
-rw-r--r-- | block/io.c | 5 | ||||
-rw-r--r-- | block/nvme.c | 52 | ||||
-rw-r--r-- | include/block/block.h | 8 |
4 files changed, 55 insertions, 23 deletions
@@ -5914,6 +5914,8 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, * Changes the AioContext used for fd handlers, timers, and BHs by this * BlockDriverState and all its children and parents. * + * Must be called from the main AioContext. + * * The caller must own the AioContext lock for the old AioContext of bs, but it * must not own the AioContext lock for new_context (unless new_context is the * same as the current context of bs). @@ -5925,9 +5927,10 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore) { AioContext *old_context = bdrv_get_aio_context(bs); - AioContext *current_context = qemu_get_current_aio_context(); BdrvChild *child; + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + if (old_context == new_context) { return; } @@ -5953,7 +5956,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, bdrv_detach_aio_context(bs); /* Acquire the new context, if necessary */ - if (current_context != new_context) { + if (qemu_get_aio_context() != new_context) { aio_context_acquire(new_context); } @@ -5965,16 +5968,16 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, * subtree that have not yet been moved to the new AioContext. * Release the old one so bdrv_drained_end() can poll them. */ - if (current_context != old_context) { + if (qemu_get_aio_context() != old_context) { aio_context_release(old_context); } bdrv_drained_end(bs); - if (current_context != old_context) { + if (qemu_get_aio_context() != old_context) { aio_context_acquire(old_context); } - if (current_context != new_context) { + if (qemu_get_aio_context() != new_context) { aio_context_release(new_context); } } @@ -217,13 +217,12 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) bs->drv->bdrv_co_drain_end(bs); } - /* Set data->done before reading bs->wakeup. */ + /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ atomic_mb_set(&data->done, true); - bdrv_dec_in_flight(bs); - if (!data->begin) { atomic_dec(data->drained_end_counter); } + bdrv_dec_in_flight(bs); g_free(data); } diff --git a/block/nvme.c b/block/nvme.c index 9896b7f..c28755c 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -102,8 +102,11 @@ typedef struct { size_t doorbell_scale; bool write_cache_supported; EventNotifier irq_notifier; + uint64_t nsze; /* Namespace size reported by identify command */ int nsid; /* The namespace id to read/write data. */ + size_t blkshift; + uint64_t max_transfer; bool plugged; @@ -217,7 +220,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs, error_propagate(errp, local_err); goto fail; } - q->cq.doorbell = &s->regs->doorbells[idx * 2 * s->doorbell_scale + 1]; + q->cq.doorbell = &s->regs->doorbells[(idx * 2 + 1) * s->doorbell_scale]; return q; fail: @@ -315,7 +318,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) while (q->inflight) { int16_t cid; c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES]; - if (!c->cid || (le16_to_cpu(c->status) & 0x1) == q->cq_phase) { + if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) { break; } q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE; @@ -339,10 +342,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) qemu_mutex_unlock(&q->lock); req.cb(req.opaque, nvme_translate_error(c)); qemu_mutex_lock(&q->lock); - c->cid = cpu_to_le16(0); q->inflight--; - /* Flip Phase Tag bit. */ - c->status = cpu_to_le16(le16_to_cpu(c->status) ^ 0x1); progress = true; } if (progress) { @@ -418,8 +418,9 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) BDRVNVMeState *s = bs->opaque; NvmeIdCtrl *idctrl; NvmeIdNs *idns; + NvmeLBAF *lbaf; uint8_t *resp; - int r; + int r, hwsect_size; uint64_t iova; NvmeCmd cmd = { .opcode = NVME_ADM_CMD_IDENTIFY, @@ -466,7 +467,22 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) } s->nsze = le64_to_cpu(idns->nsze); + lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)]; + + if (lbaf->ms) { + error_setg(errp, "Namespaces with metadata are not yet supported"); + goto out; + } + + hwsect_size = 1 << lbaf->ds; + + if (hwsect_size < BDRV_SECTOR_SIZE || hwsect_size > s->page_size) { + error_setg(errp, "Namespace has unsupported block size (%d)", + hwsect_size); + goto out; + } + s->blkshift = lbaf->ds; out: qemu_vfio_dma_unmap(s->vfio, resp); qemu_vfree(resp); @@ -785,8 +801,22 @@ fail: static int64_t nvme_getlength(BlockDriverState *bs) { BDRVNVMeState *s = bs->opaque; + return s->nsze << s->blkshift; +} - return s->nsze << BDRV_SECTOR_BITS; +static int64_t nvme_get_blocksize(BlockDriverState *bs) +{ + BDRVNVMeState *s = bs->opaque; + assert(s->blkshift >= BDRV_SECTOR_BITS); + return 1 << s->blkshift; +} + +static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + int64_t blocksize = nvme_get_blocksize(bs); + bsz->phys = blocksize; + bsz->log = blocksize; + return 0; } /* Called with s->dma_map_lock */ @@ -917,13 +947,14 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, BDRVNVMeState *s = bs->opaque; NVMeQueuePair *ioq = s->queues[1]; NVMeRequest *req; - uint32_t cdw12 = (((bytes >> BDRV_SECTOR_BITS) - 1) & 0xFFFF) | + + uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) | (flags & BDRV_REQ_FUA ? 1 << 30 : 0); NvmeCmd cmd = { .opcode = is_write ? NVME_CMD_WRITE : NVME_CMD_READ, .nsid = cpu_to_le32(s->nsid), - .cdw10 = cpu_to_le32((offset >> BDRV_SECTOR_BITS) & 0xFFFFFFFF), - .cdw11 = cpu_to_le32(((offset >> BDRV_SECTOR_BITS) >> 32) & 0xFFFFFFFF), + .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF), + .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF), .cdw12 = cpu_to_le32(cdw12), }; NVMeCoData data = { @@ -1154,6 +1185,7 @@ static BlockDriver bdrv_nvme = { .bdrv_file_open = nvme_file_open, .bdrv_close = nvme_close, .bdrv_getlength = nvme_getlength, + .bdrv_probe_blocksizes = nvme_probe_blocksizes, .bdrv_co_preadv = nvme_co_preadv, .bdrv_co_pwritev = nvme_co_pwritev, diff --git a/include/block/block.h b/include/block/block.h index 60f0047..50a07c1 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -667,11 +667,9 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); * * This polls @bs's AioContext until all scheduled sub-drained_ends * have settled. On one hand, that may result in graph changes. On - * the other, this requires that all involved nodes (@bs and all of - * its parents) are in the same AioContext, and that the caller has - * acquired it. - * If there are any nodes that are in different contexts from @bs, - * these contexts must not be acquired. + * the other, this requires that the caller either runs in the main + * loop; or that all involved nodes (@bs and all of its parents) are + * in the caller's AioContext. */ void bdrv_drained_end(BlockDriverState *bs); |