diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-15 16:49:18 -0600 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2013-01-15 16:49:18 -0600 |
commit | 5e72179b8f16e05a33fea5f63856aa16dbb29048 (patch) | |
tree | 94f06b03ae4f28470f3876997d1f867b886bd5dd /hw | |
parent | cf7c3f0cb5a7129f57fa9e69d410d6a05031988c (diff) | |
parent | 7e7b7cba16faa7b721b822fa9ed8bebafa35700f (diff) | |
download | qemu-5e72179b8f16e05a33fea5f63856aa16dbb29048.zip qemu-5e72179b8f16e05a33fea5f63856aa16dbb29048.tar.gz qemu-5e72179b8f16e05a33fea5f63856aa16dbb29048.tar.bz2 |
Merge remote-tracking branch 'sstabellini/xen-2013-01-14' into staging
* sstabellini/xen-2013-01-14:
xen_disk: implement BLKIF_OP_FLUSH_DISKCACHE, remove BLKIF_OP_WRITE_BARRIER
xen_disk: add persistent grant support to xen_disk backend
xen_disk: fix memory leak
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/xen_disk.c | 208 |
1 files changed, 184 insertions, 24 deletions
diff --git a/hw/xen_disk.c b/hw/xen_disk.c index a6a64a2..7fea871 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -51,6 +51,13 @@ static int max_requests = 32; #define BLOCK_SIZE 512 #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) +struct PersistentGrant { + void *page; + struct XenBlkDev *blkdev; +}; + +typedef struct PersistentGrant PersistentGrant; + struct ioreq { blkif_request_t req; int16_t status; @@ -68,6 +75,7 @@ struct ioreq { int prot; void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; void *pages; + int num_unmap; /* aio status */ int aio_inflight; @@ -104,6 +112,12 @@ struct XenBlkDev { int requests_inflight; int requests_finished; + /* Persistent grants extension */ + gboolean feature_persistent; + GTree *persistent_gnts; + unsigned int persistent_gnt_count; + unsigned int max_grants; + /* qemu block driver */ DriveInfo *dinfo; BlockDriverState *bs; @@ -112,6 +126,54 @@ struct XenBlkDev { /* ------------------------------------------------------------- */ +static void ioreq_reset(struct ioreq *ioreq) +{ + memset(&ioreq->req, 0, sizeof(ioreq->req)); + ioreq->status = 0; + ioreq->start = 0; + ioreq->presync = 0; + ioreq->postsync = 0; + ioreq->mapped = 0; + + memset(ioreq->domids, 0, sizeof(ioreq->domids)); + memset(ioreq->refs, 0, sizeof(ioreq->refs)); + ioreq->prot = 0; + memset(ioreq->page, 0, sizeof(ioreq->page)); + ioreq->pages = NULL; + + ioreq->aio_inflight = 0; + ioreq->aio_errors = 0; + + ioreq->blkdev = NULL; + memset(&ioreq->list, 0, sizeof(ioreq->list)); + memset(&ioreq->acct, 0, sizeof(ioreq->acct)); + + qemu_iovec_reset(&ioreq->v); +} + +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + uint ua = GPOINTER_TO_UINT(a); + uint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + +static void destroy_grant(gpointer pgnt) +{ + PersistentGrant *grant = pgnt; + XenGnttab gnt = grant->blkdev->xendev.gnttabdev; + + if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) { + xen_be_printf(&grant->blkdev->xendev, 0, + "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + grant->blkdev->persistent_gnt_count--; + xen_be_printf(&grant->blkdev->xendev, 3, + "unmapped grant %p\n", grant->page); + g_free(grant); +} + static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) { struct ioreq *ioreq = NULL; @@ -129,7 +191,6 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) /* get one from freelist */ ioreq = QLIST_FIRST(&blkdev->freelist); QLIST_REMOVE(ioreq, list); - qemu_iovec_reset(&ioreq->v); } QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); blkdev->requests_inflight++; @@ -153,7 +214,7 @@ static void ioreq_release(struct ioreq *ioreq, bool finish) struct XenBlkDev *blkdev = ioreq->blkdev; QLIST_REMOVE(ioreq, list); - memset(ioreq, 0, sizeof(*ioreq)); + ioreq_reset(ioreq); ioreq->blkdev = blkdev; QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); if (finish) { @@ -182,12 +243,11 @@ static int ioreq_parse(struct ioreq *ioreq) case BLKIF_OP_READ: ioreq->prot = PROT_WRITE; /* to memory */ break; - case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + ioreq->presync = 1; if (!ioreq->req.nr_segments) { - ioreq->presync = 1; return 0; } - ioreq->presync = ioreq->postsync = 1; /* fall through */ case BLKIF_OP_WRITE: ioreq->prot = PROT_READ; /* from memory */ @@ -241,21 +301,21 @@ static void ioreq_unmap(struct ioreq *ioreq) XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; int i; - if (ioreq->v.niov == 0 || ioreq->mapped == 0) { + if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { return; } if (batch_maps) { if (!ioreq->pages) { return; } - if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) { + if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) { xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", strerror(errno)); } - ioreq->blkdev->cnt_map -= ioreq->v.niov; + ioreq->blkdev->cnt_map -= ioreq->num_unmap; ioreq->pages = NULL; } else { - for (i = 0; i < ioreq->v.niov; i++) { + for (i = 0; i < ioreq->num_unmap; i++) { if (!ioreq->page[i]) { continue; } @@ -273,41 +333,120 @@ static void ioreq_unmap(struct ioreq *ioreq) static int ioreq_map(struct ioreq *ioreq) { XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; - int i; + uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i, j, new_maps = 0; + PersistentGrant *grant; + /* domids and refs variables will contain the information necessary + * to map the grants that are needed to fulfill this request. + * + * After mapping the needed grants, the page array will contain the + * memory address of each granted page in the order specified in ioreq + * (disregarding if it's a persistent grant or not). + */ if (ioreq->v.niov == 0 || ioreq->mapped == 1) { return 0; } - if (batch_maps) { + if (ioreq->blkdev->feature_persistent) { + for (i = 0; i < ioreq->v.niov; i++) { + grant = g_tree_lookup(ioreq->blkdev->persistent_gnts, + GUINT_TO_POINTER(ioreq->refs[i])); + + if (grant != NULL) { + page[i] = grant->page; + xen_be_printf(&ioreq->blkdev->xendev, 3, + "using persistent-grant %" PRIu32 "\n", + ioreq->refs[i]); + } else { + /* Add the grant to the list of grants that + * should be mapped + */ + domids[new_maps] = ioreq->domids[i]; + refs[new_maps] = ioreq->refs[i]; + page[i] = NULL; + new_maps++; + } + } + /* Set the protection to RW, since grants may be reused later + * with a different protection than the one needed for this request + */ + ioreq->prot = PROT_WRITE | PROT_READ; + } else { + /* All grants in the request should be mapped */ + memcpy(refs, ioreq->refs, sizeof(refs)); + memcpy(domids, ioreq->domids, sizeof(domids)); + memset(page, 0, sizeof(page)); + new_maps = ioreq->v.niov; + } + + if (batch_maps && new_maps) { ioreq->pages = xc_gnttab_map_grant_refs - (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot); + (gnt, new_maps, domids, refs, ioreq->prot); if (ioreq->pages == NULL) { xen_be_printf(&ioreq->blkdev->xendev, 0, "can't map %d grant refs (%s, %d maps)\n", - ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map); + new_maps, strerror(errno), ioreq->blkdev->cnt_map); return -1; } - for (i = 0; i < ioreq->v.niov; i++) { - ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE + - (uintptr_t)ioreq->v.iov[i].iov_base; + for (i = 0, j = 0; i < ioreq->v.niov; i++) { + if (page[i] == NULL) { + page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE; + } } - ioreq->blkdev->cnt_map += ioreq->v.niov; - } else { - for (i = 0; i < ioreq->v.niov; i++) { + ioreq->blkdev->cnt_map += new_maps; + } else if (new_maps) { + for (i = 0; i < new_maps; i++) { ioreq->page[i] = xc_gnttab_map_grant_ref - (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot); + (gnt, domids[i], refs[i], ioreq->prot); if (ioreq->page[i] == NULL) { xen_be_printf(&ioreq->blkdev->xendev, 0, "can't map grant ref %d (%s, %d maps)\n", - ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map); + refs[i], strerror(errno), ioreq->blkdev->cnt_map); ioreq_unmap(ioreq); return -1; } - ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base; ioreq->blkdev->cnt_map++; } + for (i = 0, j = 0; i < ioreq->v.niov; i++) { + if (page[i] == NULL) { + page[i] = ioreq->page[j++]; + } + } + } + if (ioreq->blkdev->feature_persistent) { + while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants) + && new_maps) { + /* Go through the list of newly mapped grants and add as many + * as possible to the list of persistently mapped grants. + * + * Since we start at the end of ioreq->page(s), we only need + * to decrease new_maps to prevent this granted pages from + * being unmapped in ioreq_unmap. + */ + grant = g_malloc0(sizeof(*grant)); + new_maps--; + if (batch_maps) { + grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; + } else { + grant->page = ioreq->page[new_maps]; + } + grant->blkdev = ioreq->blkdev; + xen_be_printf(&ioreq->blkdev->xendev, 3, + "adding grant %" PRIu32 " page: %p\n", + refs[new_maps], grant->page); + g_tree_insert(ioreq->blkdev->persistent_gnts, + GUINT_TO_POINTER(refs[new_maps]), + grant); + ioreq->blkdev->persistent_gnt_count++; + } + } + for (i = 0; i < ioreq->v.niov; i++) { + ioreq->v.iov[i].iov_base += (uintptr_t)page[i]; } ioreq->mapped = 1; + ioreq->num_unmap = new_maps; return 0; } @@ -369,7 +508,7 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) qemu_aio_complete, ioreq); break; case BLKIF_OP_WRITE: - case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: if (!ioreq->req.nr_segments) { break; } @@ -654,7 +793,8 @@ static int blk_init(struct XenDevice *xendev) blkdev->file_size, blkdev->file_size >> 20); /* fill info */ - xenstore_write_be_int(&blkdev->xendev, "feature-barrier", 1); + xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); + xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); xenstore_write_be_int(&blkdev->xendev, "info", info); xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); xenstore_write_be_int(&blkdev->xendev, "sectors", @@ -678,6 +818,7 @@ out_error: static int blk_connect(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + int pers; if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { return -1; @@ -686,6 +827,11 @@ static int blk_connect(struct XenDevice *xendev) &blkdev->xendev.remote_port) == -1) { return -1; } + if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) { + blkdev->feature_persistent = FALSE; + } else { + blkdev->feature_persistent = !!pers; + } blkdev->protocol = BLKIF_PROTOCOL_NATIVE; if (blkdev->xendev.protocol) { @@ -729,6 +875,15 @@ static int blk_connect(struct XenDevice *xendev) } } + if (blkdev->feature_persistent) { + /* Init persistent grants */ + blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST; + blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, + (GDestroyNotify)destroy_grant); + blkdev->persistent_gnt_count = 0; + } + xen_be_bind_evtchn(&blkdev->xendev); xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " @@ -769,6 +924,11 @@ static int blk_free(struct XenDevice *xendev) blk_disconnect(xendev); } + /* Free persistent grants */ + if (blkdev->feature_persistent) { + g_tree_destroy(blkdev->persistent_gnts); + } + while (!QLIST_EMPTY(&blkdev->freelist)) { ioreq = QLIST_FIRST(&blkdev->freelist); QLIST_REMOVE(ioreq, list); |