diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2014-08-15 14:49:50 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2014-08-15 14:49:50 +0100 |
commit | f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9 (patch) | |
tree | 94e077ff9f6c344af73d34bd66468b8852ce2c37 /block | |
parent | f083201667fddd51055c2ac67f67221e82298a35 (diff) | |
parent | 908bcd540f489f7adf2d804347905b0025d808d3 (diff) | |
download | qemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.zip qemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.tar.gz qemu-f2fb1da9412ae7b4cb512cfbd86c0185f191e2f9.tar.bz2 |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block patches
# gpg: Signature made Fri 15 Aug 2014 14:07:42 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
* remotes/kevin/tags/for-upstream: (59 commits)
block: Catch !bs->drv in bdrv_check()
iotests: Add test for image header overlap
qcow2: Catch !*host_offset for data allocation
qcow2: Return useful error code in refcount_init()
mirror: Handle failure for potentially large allocations
vpc: Handle failure for potentially large allocations
vmdk: Handle failure for potentially large allocations
vhdx: Handle failure for potentially large allocations
vdi: Handle failure for potentially large allocations
rbd: Handle failure for potentially large allocations
raw-win32: Handle failure for potentially large allocations
raw-posix: Handle failure for potentially large allocations
qed: Handle failure for potentially large allocations
qcow2: Handle failure for potentially large allocations
qcow1: Handle failure for potentially large allocations
parallels: Handle failure for potentially large allocations
nfs: Handle failure for potentially large allocations
iscsi: Handle failure for potentially large allocations
dmg: Handle failure for potentially large allocations
curl: Handle failure for potentially large allocations
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/archipelago.c | 1069 | ||||
-rw-r--r-- | block/bochs.c | 6 | ||||
-rw-r--r-- | block/cloop.c | 23 | ||||
-rw-r--r-- | block/curl.c | 8 | ||||
-rw-r--r-- | block/dmg.c | 19 | ||||
-rw-r--r-- | block/iscsi.c | 5 | ||||
-rw-r--r-- | block/mirror.c | 7 | ||||
-rw-r--r-- | block/nfs.c | 6 | ||||
-rw-r--r-- | block/parallels.c | 6 | ||||
-rw-r--r-- | block/qapi.c | 42 | ||||
-rw-r--r-- | block/qcow.c | 33 | ||||
-rw-r--r-- | block/qcow2-cache.c | 13 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 47 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 54 | ||||
-rw-r--r-- | block/qcow2-snapshot.c | 23 | ||||
-rw-r--r-- | block/qcow2.c | 45 | ||||
-rw-r--r-- | block/qed-check.c | 7 | ||||
-rw-r--r-- | block/qed.c | 6 | ||||
-rw-r--r-- | block/raw-posix.c | 6 | ||||
-rw-r--r-- | block/rbd.c | 7 | ||||
-rw-r--r-- | block/vdi.c | 112 | ||||
-rw-r--r-- | block/vhdx-endian.c | 11 | ||||
-rw-r--r-- | block/vhdx-log.c | 55 | ||||
-rw-r--r-- | block/vhdx.c | 98 | ||||
-rw-r--r-- | block/vhdx.h | 1 | ||||
-rw-r--r-- | block/vmdk.c | 239 | ||||
-rw-r--r-- | block/vpc.c | 112 | ||||
-rw-r--r-- | block/win32-aio.c | 6 |
29 files changed, 1733 insertions, 335 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index fd88c03..858d2b3 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -17,6 +17,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o +block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o endif @@ -35,5 +36,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS) gluster.o-libs := $(GLUSTERFS_LIBS) ssh.o-cflags := $(LIBSSH2_CFLAGS) ssh.o-libs := $(LIBSSH2_LIBS) +archipelago.o-libs := $(ARCHIPELAGO_LIBS) qcow.o-libs := -lz linux-aio.o-libs := -laio diff --git a/block/archipelago.c b/block/archipelago.c new file mode 100644 index 0000000..6629d03 --- /dev/null +++ b/block/archipelago.c @@ -0,0 +1,1069 @@ +/* + * QEMU Block driver for Archipelago + * + * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +/* + * VM Image on Archipelago volume is specified like this: + * + * file.driver=archipelago,file.volume=<volumename> + * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>] + * [,file.segment=<segment_name>]] + * + * or + * + * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][: + * segment=<segment_name>]] + * + * 'archipelago' is the protocol. + * + * 'mport' is the port number on which mapperd is listening. This is optional + * and if not specified, QEMU will make Archipelago to use the default port. + * + * 'vport' is the port number on which vlmcd is listening. This is optional + * and if not specified, QEMU will make Archipelago to use the default port. + * + * 'segment' is the name of the shared memory segment Archipelago stack + * is using. This is optional and if not specified, QEMU will make Archipelago + * to use the default value, 'archipelago'. + * + * Examples: + * + * file.driver=archipelago,file.volume=my_vm_volume + * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123 + * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, + * file.vport=1234 + * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, + * file.vport=1234,file.segment=my_segment + * + * or + * + * file=archipelago:my_vm_volume + * file=archipelago:my_vm_volume/mport=123 + * file=archipelago:my_vm_volume/mport=123:vport=1234 + * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment + * + */ + +#include "qemu-common.h" +#include "block/block_int.h" +#include "qemu/error-report.h" +#include "qemu/thread.h" +#include "qapi/qmp/qint.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qjson.h" + +#include <inttypes.h> +#include <xseg/xseg.h> +#include <xseg/protocol.h> + +#define ARCHIP_FD_READ 0 +#define ARCHIP_FD_WRITE 1 +#define MAX_REQUEST_SIZE 524288 + +#define ARCHIPELAGO_OPT_VOLUME "volume" +#define ARCHIPELAGO_OPT_SEGMENT "segment" +#define ARCHIPELAGO_OPT_MPORT "mport" +#define ARCHIPELAGO_OPT_VPORT "vport" +#define ARCHIPELAGO_DFL_MPORT 1001 +#define ARCHIPELAGO_DFL_VPORT 501 + +#define archipelagolog(fmt, ...) \ + do { \ + fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \ + } while (0) + +typedef enum { + ARCHIP_OP_READ, + ARCHIP_OP_WRITE, + ARCHIP_OP_FLUSH, + ARCHIP_OP_VOLINFO, +} ARCHIPCmd; + +typedef struct ArchipelagoAIOCB { + BlockDriverAIOCB common; + QEMUBH *bh; + struct BDRVArchipelagoState *s; + QEMUIOVector *qiov; + ARCHIPCmd cmd; + bool cancelled; + int status; + int64_t size; + int64_t ret; +} ArchipelagoAIOCB; + +typedef struct BDRVArchipelagoState { + ArchipelagoAIOCB *event_acb; + char *volname; + char *segment_name; + uint64_t size; + /* Archipelago specific */ + struct xseg *xseg; + struct xseg_port *port; + xport srcport; + xport sport; + xport mportno; + xport vportno; + QemuMutex archip_mutex; + QemuCond archip_cond; + bool is_signaled; + /* Request handler specific */ + QemuThread request_th; + QemuCond request_cond; + QemuMutex request_mutex; + bool th_is_signaled; + bool stopping; +} BDRVArchipelagoState; + +typedef struct ArchipelagoSegmentedRequest { + size_t count; + size_t total; + int ref; + int failed; +} ArchipelagoSegmentedRequest; + +typedef struct AIORequestData { + const char *volname; + off_t offset; + size_t size; + uint64_t bufidx; + int ret; + int op; + ArchipelagoAIOCB *aio_cb; + ArchipelagoSegmentedRequest *segreq; +} AIORequestData; + +static void qemu_archipelago_complete_aio(void *opaque); + +static void init_local_signal(struct xseg *xseg, xport sport, xport srcport) +{ + if (xseg && (sport != srcport)) { + xseg_init_local_signal(xseg, srcport); + sport = srcport; + } +} + +static void archipelago_finish_aiocb(AIORequestData *reqdata) +{ + if (reqdata->aio_cb->ret != reqdata->segreq->total) { + reqdata->aio_cb->ret = -EIO; + } else if (reqdata->aio_cb->ret == reqdata->segreq->total) { + reqdata->aio_cb->ret = 0; + } + reqdata->aio_cb->bh = aio_bh_new( + bdrv_get_aio_context(reqdata->aio_cb->common.bs), + qemu_archipelago_complete_aio, reqdata + ); + qemu_bh_schedule(reqdata->aio_cb->bh); +} + +static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port, + struct xseg_request *expected_req) +{ + struct xseg_request *req; + xseg_prepare_wait(xseg, srcport); + void *psd = xseg_get_signal_desc(xseg, port); + while (1) { + req = xseg_receive(xseg, srcport, X_NONBLOCK); + if (req) { + if (req != expected_req) { + archipelagolog("Unknown received request\n"); + xseg_put_request(xseg, req, srcport); + } else if (!(req->state & XS_SERVED)) { + return -1; + } else { + break; + } + } + xseg_wait_signal(xseg, psd, 100000UL); + } + xseg_cancel_wait(xseg, srcport); + return 0; +} + +static void xseg_request_handler(void *state) +{ + BDRVArchipelagoState *s = (BDRVArchipelagoState *) state; + void *psd = xseg_get_signal_desc(s->xseg, s->port); + qemu_mutex_lock(&s->request_mutex); + + while (!s->stopping) { + struct xseg_request *req; + void *data; + xseg_prepare_wait(s->xseg, s->srcport); + req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK); + if (req) { + AIORequestData *reqdata; + ArchipelagoSegmentedRequest *segreq; + xseg_get_req_data(s->xseg, req, (void **)&reqdata); + + switch (reqdata->op) { + case ARCHIP_OP_READ: + data = xseg_get_data(s->xseg, req); + segreq = reqdata->segreq; + segreq->count += req->serviced; + + qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx, + data, + req->serviced); + + xseg_put_request(s->xseg, req, s->srcport); + + if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) { + if (!segreq->failed) { + reqdata->aio_cb->ret = segreq->count; + archipelago_finish_aiocb(reqdata); + g_free(segreq); + } else { + g_free(segreq); + g_free(reqdata); + } + } else { + g_free(reqdata); + } + break; + case ARCHIP_OP_WRITE: + case ARCHIP_OP_FLUSH: + segreq = reqdata->segreq; + segreq->count += req->serviced; + xseg_put_request(s->xseg, req, s->srcport); + + if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) { + if (!segreq->failed) { + reqdata->aio_cb->ret = segreq->count; + archipelago_finish_aiocb(reqdata); + g_free(segreq); + } else { + g_free(segreq); + g_free(reqdata); + } + } else { + g_free(reqdata); + } + break; + case ARCHIP_OP_VOLINFO: + s->is_signaled = true; + qemu_cond_signal(&s->archip_cond); + break; + } + } else { + xseg_wait_signal(s->xseg, psd, 100000UL); + } + xseg_cancel_wait(s->xseg, s->srcport); + } + + s->th_is_signaled = true; + qemu_cond_signal(&s->request_cond); + qemu_mutex_unlock(&s->request_mutex); + qemu_thread_exit(NULL); +} + +static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s) +{ + if (xseg_initialize()) { + archipelagolog("Cannot initialize XSEG\n"); + goto err_exit; + } + + s->xseg = xseg_join("posix", s->segment_name, + "posixfd", NULL); + if (!s->xseg) { + archipelagolog("Cannot join XSEG shared memory segment\n"); + goto err_exit; + } + s->port = xseg_bind_dynport(s->xseg); + s->srcport = s->port->portno; + init_local_signal(s->xseg, s->sport, s->srcport); + return 0; + +err_exit: + return -1; +} + +static int qemu_archipelago_init(BDRVArchipelagoState *s) +{ + int ret; + + ret = qemu_archipelago_xseg_init(s); + if (ret < 0) { + error_report("Cannot initialize XSEG. Aborting...\n"); + goto err_exit; + } + + qemu_cond_init(&s->archip_cond); + qemu_mutex_init(&s->archip_mutex); + qemu_cond_init(&s->request_cond); + qemu_mutex_init(&s->request_mutex); + s->th_is_signaled = false; + qemu_thread_create(&s->request_th, "xseg_io_th", + (void *) xseg_request_handler, + (void *) s, QEMU_THREAD_JOINABLE); + +err_exit: + return ret; +} + +static void qemu_archipelago_complete_aio(void *opaque) +{ + AIORequestData *reqdata = (AIORequestData *) opaque; + ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb; + + qemu_bh_delete(aio_cb->bh); + aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret); + aio_cb->status = 0; + + if (!aio_cb->cancelled) { + qemu_aio_release(aio_cb); + } + g_free(reqdata); +} + +static void xseg_find_port(char *pstr, const char *needle, xport *aport) +{ + const char *a; + char *endptr = NULL; + unsigned long port; + if (strstart(pstr, needle, &a)) { + if (strlen(a) > 0) { + port = strtoul(a, &endptr, 10); + if (strlen(endptr)) { + *aport = -2; + return; + } + *aport = (xport) port; + } + } +} + +static void xseg_find_segment(char *pstr, const char *needle, + char **segment_name) +{ + const char *a; + if (strstart(pstr, needle, &a)) { + if (strlen(a) > 0) { + *segment_name = g_strdup(a); + } + } +} + +static void parse_filename_opts(const char *filename, Error **errp, + char **volume, char **segment_name, + xport *mport, xport *vport) +{ + const char *start; + char *tokens[4], *ds; + int idx; + xport lmport = NoPort, lvport = NoPort; + + strstart(filename, "archipelago:", &start); + + ds = g_strdup(start); + tokens[0] = strtok(ds, "/"); + tokens[1] = strtok(NULL, ":"); + tokens[2] = strtok(NULL, ":"); + tokens[3] = strtok(NULL, "\0"); + + if (!strlen(tokens[0])) { + error_setg(errp, "volume name must be specified first"); + g_free(ds); + return; + } + + for (idx = 1; idx < 4; idx++) { + if (tokens[idx] != NULL) { + if (strstart(tokens[idx], "mport=", NULL)) { + xseg_find_port(tokens[idx], "mport=", &lmport); + } + if (strstart(tokens[idx], "vport=", NULL)) { + xseg_find_port(tokens[idx], "vport=", &lvport); + } + if (strstart(tokens[idx], "segment=", NULL)) { + xseg_find_segment(tokens[idx], "segment=", segment_name); + } + } + } + + if ((lmport == -2) || (lvport == -2)) { + error_setg(errp, "mport and/or vport must be set"); + g_free(ds); + return; + } + *volume = g_strdup(tokens[0]); + *mport = lmport; + *vport = lvport; + g_free(ds); +} + +static void archipelago_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + const char *start; + char *volume = NULL, *segment_name = NULL; + xport mport = NoPort, vport = NoPort; + + if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME) + || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT) + || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT) + || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) { + error_setg(errp, "volume/mport/vport/segment and a file name may not" + " be specified at the same time"); + return; + } + + if (!strstart(filename, "archipelago:", &start)) { + error_setg(errp, "File name must start with 'archipelago:'"); + return; + } + + if (!strlen(start) || strstart(start, "/", NULL)) { + error_setg(errp, "volume name must be specified"); + return; + } + + parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport); + + if (volume) { + qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume)); + g_free(volume); + } + if (segment_name) { + qdict_put(options, ARCHIPELAGO_OPT_SEGMENT, + qstring_from_str(segment_name)); + g_free(segment_name); + } + if (mport != NoPort) { + qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport)); + } + if (vport != NoPort) { + qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport)); + } +} + +static QemuOptsList archipelago_runtime_opts = { + .name = "archipelago", + .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head), + .desc = { + { + .name = ARCHIPELAGO_OPT_VOLUME, + .type = QEMU_OPT_STRING, + .help = "Name of the volume image", + }, + { + .name = ARCHIPELAGO_OPT_SEGMENT, + .type = QEMU_OPT_STRING, + .help = "Name of the Archipelago shared memory segment", + }, + { + .name = ARCHIPELAGO_OPT_MPORT, + .type = QEMU_OPT_NUMBER, + .help = "Archipelago mapperd port number" + }, + { + .name = ARCHIPELAGO_OPT_VPORT, + .type = QEMU_OPT_NUMBER, + .help = "Archipelago vlmcd port number" + + }, + { /* end of list */ } + }, +}; + +static int qemu_archipelago_open(BlockDriverState *bs, + QDict *options, + int bdrv_flags, + Error **errp) +{ + int ret = 0; + const char *volume, *segment_name; + QemuOpts *opts; + Error *local_err = NULL; + BDRVArchipelagoState *s = bs->opaque; + + opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto err_exit; + } + + s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT, + ARCHIPELAGO_DFL_MPORT); + s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT, + ARCHIPELAGO_DFL_VPORT); + + segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT); + if (segment_name == NULL) { + s->segment_name = g_strdup("archipelago"); + } else { + s->segment_name = g_strdup(segment_name); + } + + volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME); + if (volume == NULL) { + error_setg(errp, "archipelago block driver requires the 'volume'" + " option"); + ret = -EINVAL; + goto err_exit; + } + s->volname = g_strdup(volume); + + /* Initialize XSEG, join shared memory segment */ + ret = qemu_archipelago_init(s); + if (ret < 0) { + error_setg(errp, "cannot initialize XSEG and join shared " + "memory segment"); + goto err_exit; + } + + qemu_opts_del(opts); + return 0; + +err_exit: + g_free(s->volname); + g_free(s->segment_name); + qemu_opts_del(opts); + return ret; +} + +static void qemu_archipelago_close(BlockDriverState *bs) +{ + int r, targetlen; + char *target; + struct xseg_request *req; + BDRVArchipelagoState *s = bs->opaque; + + s->stopping = true; + + qemu_mutex_lock(&s->request_mutex); + while (!s->th_is_signaled) { + qemu_cond_wait(&s->request_cond, + &s->request_mutex); + } + qemu_mutex_unlock(&s->request_mutex); + qemu_thread_join(&s->request_th); + qemu_cond_destroy(&s->request_cond); + qemu_mutex_destroy(&s->request_mutex); + + qemu_cond_destroy(&s->archip_cond); + qemu_mutex_destroy(&s->archip_mutex); + + targetlen = strlen(s->volname); + req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); + if (!req) { + archipelagolog("Cannot get XSEG request\n"); + goto err_exit; + } + r = xseg_prep_request(s->xseg, req, targetlen, 0); + if (r < 0) { + xseg_put_request(s->xseg, req, s->srcport); + archipelagolog("Cannot prepare XSEG close request\n"); + goto err_exit; + } + + target = xseg_get_target(s->xseg, req); + memcpy(target, s->volname, targetlen); + req->size = req->datalen; + req->offset = 0; + req->op = X_CLOSE; + + xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); + if (p == NoPort) { + xseg_put_request(s->xseg, req, s->srcport); + archipelagolog("Cannot submit XSEG close request\n"); + goto err_exit; + } + + xseg_signal(s->xseg, p); + wait_reply(s->xseg, s->srcport, s->port, req); + + xseg_put_request(s->xseg, req, s->srcport); + +err_exit: + g_free(s->volname); + g_free(s->segment_name); + xseg_quit_local_signal(s->xseg, s->srcport); + xseg_leave_dynport(s->xseg, s->port); + xseg_leave(s->xseg); +} + +static int qemu_archipelago_create_volume(Error **errp, const char *volname, + char *segment_name, + uint64_t size, xport mportno, + xport vportno) +{ + int ret, targetlen; + struct xseg *xseg = NULL; + struct xseg_request *req; + struct xseg_request_clone *xclone; + struct xseg_port *port; + xport srcport = NoPort, sport = NoPort; + char *target; + + /* Try default values if none has been set */ + if (mportno == (xport) -1) { + mportno = ARCHIPELAGO_DFL_MPORT; + } + + if (vportno == (xport) -1) { + vportno = ARCHIPELAGO_DFL_VPORT; + } + + if (xseg_initialize()) { + error_setg(errp, "Cannot initialize XSEG"); + return -1; + } + + xseg = xseg_join("posix", segment_name, + "posixfd", NULL); + + if (!xseg) { + error_setg(errp, "Cannot join XSEG shared memory segment"); + return -1; + } + + port = xseg_bind_dynport(xseg); + srcport = port->portno; + init_local_signal(xseg, sport, srcport); + + req = xseg_get_request(xseg, srcport, mportno, X_ALLOC); + if (!req) { + error_setg(errp, "Cannot get XSEG request"); + return -1; + } + + targetlen = strlen(volname); + ret = xseg_prep_request(xseg, req, targetlen, + sizeof(struct xseg_request_clone)); + if (ret < 0) { + error_setg(errp, "Cannot prepare XSEG request"); + goto err_exit; + } + + target = xseg_get_target(xseg, req); + if (!target) { + error_setg(errp, "Cannot get XSEG target.\n"); + goto err_exit; + } + memcpy(target, volname, targetlen); + xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req); + memset(xclone->target, 0 , XSEG_MAX_TARGETLEN); + xclone->targetlen = 0; + xclone->size = size; + req->offset = 0; + req->size = req->datalen; + req->op = X_CLONE; + + xport p = xseg_submit(xseg, req, srcport, X_ALLOC); + if (p == NoPort) { + error_setg(errp, "Could not submit XSEG request"); + goto err_exit; + } + xseg_signal(xseg, p); + + ret = wait_reply(xseg, srcport, port, req); + if (ret < 0) { + error_setg(errp, "wait_reply() error."); + } + + xseg_put_request(xseg, req, srcport); + xseg_quit_local_signal(xseg, srcport); + xseg_leave_dynport(xseg, port); + xseg_leave(xseg); + return ret; + +err_exit: + xseg_put_request(xseg, req, srcport); + xseg_quit_local_signal(xseg, srcport); + xseg_leave_dynport(xseg, port); + xseg_leave(xseg); + return -1; +} + +static int qemu_archipelago_create(const char *filename, + QemuOpts *options, + Error **errp) +{ + int ret = 0; + uint64_t total_size = 0; + char *volname = NULL, *segment_name = NULL; + const char *start; + xport mport = NoPort, vport = NoPort; + + if (!strstart(filename, "archipelago:", &start)) { + error_setg(errp, "File name must start with 'archipelago:'"); + return -1; + } + + if (!strlen(start) || strstart(start, "/", NULL)) { + error_setg(errp, "volume name must be specified"); + return -1; + } + + parse_filename_opts(filename, errp, &volname, &segment_name, &mport, + &vport); + total_size = qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0); + + if (segment_name == NULL) { + segment_name = g_strdup("archipelago"); + } + + /* Create an Archipelago volume */ + ret = qemu_archipelago_create_volume(errp, volname, segment_name, + total_size, mport, + vport); + + g_free(volname); + g_free(segment_name); + return ret; +} + +static void qemu_archipelago_aio_cancel(BlockDriverAIOCB *blockacb) +{ + ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) blockacb; + aio_cb->cancelled = true; + while (aio_cb->status == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(aio_cb->common.bs), true); + } + qemu_aio_release(aio_cb); +} + +static const AIOCBInfo archipelago_aiocb_info = { + .aiocb_size = sizeof(ArchipelagoAIOCB), + .cancel = qemu_archipelago_aio_cancel, +}; + +static int archipelago_submit_request(BDRVArchipelagoState *s, + uint64_t bufidx, + size_t count, + off_t offset, + ArchipelagoAIOCB *aio_cb, + ArchipelagoSegmentedRequest *segreq, + int op) +{ + int ret, targetlen; + char *target; + void *data = NULL; + struct xseg_request *req; + AIORequestData *reqdata = g_malloc(sizeof(AIORequestData)); + + targetlen = strlen(s->volname); + req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); + if (!req) { + archipelagolog("Cannot get XSEG request\n"); + goto err_exit2; + } + ret = xseg_prep_request(s->xseg, req, targetlen, count); + if (ret < 0) { + archipelagolog("Cannot prepare XSEG request\n"); + goto err_exit; + } + target = xseg_get_target(s->xseg, req); + if (!target) { + archipelagolog("Cannot get XSEG target\n"); + goto err_exit; + } + memcpy(target, s->volname, targetlen); + req->size = count; + req->offset = offset; + + switch (op) { + case ARCHIP_OP_READ: + req->op = X_READ; + break; + case ARCHIP_OP_WRITE: + req->op = X_WRITE; + break; + case ARCHIP_OP_FLUSH: + req->op = X_FLUSH; + break; + } + reqdata->volname = s->volname; + reqdata->offset = offset; + reqdata->size = count; + reqdata->bufidx = bufidx; + reqdata->aio_cb = aio_cb; + reqdata->segreq = segreq; + reqdata->op = op; + + xseg_set_req_data(s->xseg, req, reqdata); + if (op == ARCHIP_OP_WRITE) { + data = xseg_get_data(s->xseg, req); + if (!data) { + archipelagolog("Cannot get XSEG data\n"); + goto err_exit; + } + qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count); + } + + xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); + if (p == NoPort) { + archipelagolog("Could not submit XSEG request\n"); + goto err_exit; + } + xseg_signal(s->xseg, p); + return 0; + +err_exit: + g_free(reqdata); + xseg_put_request(s->xseg, req, s->srcport); + return -EIO; +err_exit2: + g_free(reqdata); + return -EIO; +} + +static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s, + size_t count, + off_t offset, + ArchipelagoAIOCB *aio_cb, + int op) +{ + int i, ret, segments_nr, last_segment_size; + ArchipelagoSegmentedRequest *segreq; + + segreq = g_malloc(sizeof(ArchipelagoSegmentedRequest)); + + if (op == ARCHIP_OP_FLUSH) { + segments_nr = 1; + segreq->ref = segments_nr; + segreq->total = count; + segreq->count = 0; + segreq->failed = 0; + ret = archipelago_submit_request(s, 0, count, offset, aio_cb, + segreq, ARCHIP_OP_FLUSH); + if (ret < 0) { + goto err_exit; + } + return 0; + } + + segments_nr = (int)(count / MAX_REQUEST_SIZE) + \ + ((count % MAX_REQUEST_SIZE) ? 1 : 0); + last_segment_size = (int)(count % MAX_REQUEST_SIZE); + + segreq->ref = segments_nr; + segreq->total = count; + segreq->count = 0; + segreq->failed = 0; + + for (i = 0; i < segments_nr - 1; i++) { + ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE, + MAX_REQUEST_SIZE, + offset + i * MAX_REQUEST_SIZE, + aio_cb, segreq, op); + + if (ret < 0) { + goto err_exit; + } + } + + if ((segments_nr > 1) && last_segment_size) { + ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE, + last_segment_size, + offset + i * MAX_REQUEST_SIZE, + aio_cb, segreq, op); + } else if ((segments_nr > 1) && !last_segment_size) { + ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE, + MAX_REQUEST_SIZE, + offset + i * MAX_REQUEST_SIZE, + aio_cb, segreq, op); + } else if (segments_nr == 1) { + ret = archipelago_submit_request(s, 0, count, offset, aio_cb, + segreq, op); + } + + if (ret < 0) { + goto err_exit; + } + + return 0; + +err_exit: + __sync_add_and_fetch(&segreq->failed, 1); + if (segments_nr == 1) { + if (__sync_add_and_fetch(&segreq->ref, -1) == 0) { + g_free(segreq); + } + } else { + if ((__sync_add_and_fetch(&segreq->ref, -segments_nr + i)) == 0) { + g_free(segreq); + } + } + + return ret; +} + +static BlockDriverAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque, + int op) +{ + ArchipelagoAIOCB *aio_cb; + BDRVArchipelagoState *s = bs->opaque; + int64_t size, off; + int ret; + + aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque); + aio_cb->cmd = op; + aio_cb->qiov = qiov; + + aio_cb->ret = 0; + aio_cb->s = s; + aio_cb->cancelled = false; + aio_cb->status = -EINPROGRESS; + + off = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + aio_cb->size = size; + + ret = archipelago_aio_segmented_rw(s, size, off, + aio_cb, op); + if (ret < 0) { + goto err_exit; + } + return &aio_cb->common; + +err_exit: + error_report("qemu_archipelago_aio_rw(): I/O Error\n"); + qemu_aio_release(aio_cb); + return NULL; +} + +static BlockDriverAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, + opaque, ARCHIP_OP_READ); +} + +static BlockDriverAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, + opaque, ARCHIP_OP_WRITE); +} + +static int64_t archipelago_volume_info(BDRVArchipelagoState *s) +{ + uint64_t size; + int ret, targetlen; + struct xseg_request *req; + struct xseg_reply_info *xinfo; + AIORequestData *reqdata = g_malloc(sizeof(AIORequestData)); + + const char *volname = s->volname; + targetlen = strlen(volname); + req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); + if (!req) { + archipelagolog("Cannot get XSEG request\n"); + goto err_exit2; + } + ret = xseg_prep_request(s->xseg, req, targetlen, + sizeof(struct xseg_reply_info)); + if (ret < 0) { + archipelagolog("Cannot prepare XSEG request\n"); + goto err_exit; + } + char *target = xseg_get_target(s->xseg, req); + if (!target) { + archipelagolog("Cannot get XSEG target\n"); + goto err_exit; + } + memcpy(target, volname, targetlen); + req->size = req->datalen; + req->offset = 0; + req->op = X_INFO; + + reqdata->op = ARCHIP_OP_VOLINFO; + reqdata->volname = volname; + xseg_set_req_data(s->xseg, req, reqdata); + + xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); + if (p == NoPort) { + archipelagolog("Cannot submit XSEG request\n"); + goto err_exit; + } + xseg_signal(s->xseg, p); + qemu_mutex_lock(&s->archip_mutex); + while (!s->is_signaled) { + qemu_cond_wait(&s->archip_cond, &s->archip_mutex); + } + s->is_signaled = false; + qemu_mutex_unlock(&s->archip_mutex); + + xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req); + size = xinfo->size; + xseg_put_request(s->xseg, req, s->srcport); + g_free(reqdata); + s->size = size; + return size; + +err_exit: + xseg_put_request(s->xseg, req, s->srcport); +err_exit2: + g_free(reqdata); + return -EIO; +} + +static int64_t qemu_archipelago_getlength(BlockDriverState *bs) +{ + int64_t ret; + BDRVArchipelagoState *s = bs->opaque; + + ret = archipelago_volume_info(s); + return ret; +} + +static QemuOptsList qemu_archipelago_create_opts = { + .name = "archipelago-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { /* end of list */ } + } +}; + +static BlockDriverAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque, + ARCHIP_OP_FLUSH); +} + +static BlockDriver bdrv_archipelago = { + .format_name = "archipelago", + .protocol_name = "archipelago", + .instance_size = sizeof(BDRVArchipelagoState), + .bdrv_parse_filename = archipelago_parse_filename, + .bdrv_file_open = qemu_archipelago_open, + .bdrv_close = qemu_archipelago_close, + .bdrv_create = qemu_archipelago_create, + .bdrv_getlength = qemu_archipelago_getlength, + .bdrv_aio_readv = qemu_archipelago_aio_readv, + .bdrv_aio_writev = qemu_archipelago_aio_writev, + .bdrv_aio_flush = qemu_archipelago_aio_flush, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .create_opts = &qemu_archipelago_create_opts, +}; + +static void bdrv_archipelago_init(void) +{ + bdrv_register(&bdrv_archipelago); +} + +block_init(bdrv_archipelago_init); diff --git a/block/bochs.c b/block/bochs.c index eba23df..6674b27 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -131,7 +131,11 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return -EFBIG; } - s->catalog_bitmap = g_malloc(s->catalog_size * 4); + s->catalog_bitmap = g_try_malloc(s->catalog_size * 4); + if (s->catalog_size && s->catalog_bitmap == NULL) { + error_setg(errp, "Could not allocate memory for catalog"); + return -ENOMEM; + } ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap, s->catalog_size * 4); diff --git a/block/cloop.c b/block/cloop.c index 8457737..f328be0 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -116,7 +116,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, "try increasing block size"); return -EINVAL; } - s->offsets = g_malloc(offsets_size); + + s->offsets = g_try_malloc(offsets_size); + if (s->offsets == NULL) { + error_setg(errp, "Could not allocate offsets table"); + return -ENOMEM; + } ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size); if (ret < 0) { @@ -158,8 +163,20 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, } /* initialize zlib engine */ - s->compressed_block = g_malloc(max_compressed_block_size + 1); - s->uncompressed_block = g_malloc(s->block_size); + s->compressed_block = g_try_malloc(max_compressed_block_size + 1); + if (s->compressed_block == NULL) { + error_setg(errp, "Could not allocate compressed_block"); + ret = -ENOMEM; + goto fail; + } + + s->uncompressed_block = g_try_malloc(s->block_size); + if (s->uncompressed_block == NULL) { + error_setg(errp, "Could not allocate uncompressed_block"); + ret = -ENOMEM; + goto fail; + } + if (inflateInit(&s->zstream) != Z_OK) { ret = -EINVAL; goto fail; diff --git a/block/curl.c b/block/curl.c index 79ff2f1..d4b85d2 100644 --- a/block/curl.c +++ b/block/curl.c @@ -640,7 +640,13 @@ static void curl_readv_bh_cb(void *p) state->buf_start = start; state->buf_len = acb->end + s->readahead_size; end = MIN(start + state->buf_len, s->len) - 1; - state->orig_buf = g_malloc(state->buf_len); + state->orig_buf = g_try_malloc(state->buf_len); + if (state->buf_len && state->orig_buf == NULL) { + curl_clean_state(state); + acb->common.cb(acb->common.opaque, -ENOMEM); + qemu_aio_release(acb); + return; + } state->acb[0] = acb; snprintf(state->range, 127, "%zd-%zd", start, end); diff --git a/block/dmg.c b/block/dmg.c index 1e153cd..e455886 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -284,8 +284,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, } /* initialize zlib engine */ - s->compressed_chunk = g_malloc(max_compressed_size + 1); - s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk); + s->compressed_chunk = qemu_try_blockalign(bs->file, + max_compressed_size + 1); + s->uncompressed_chunk = qemu_try_blockalign(bs->file, + 512 * max_sectors_per_chunk); + if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) { + ret = -ENOMEM; + goto fail; + } + if (inflateInit(&s->zstream) != Z_OK) { ret = -EINVAL; goto fail; @@ -302,8 +309,8 @@ fail: g_free(s->lengths); g_free(s->sectors); g_free(s->sectorcounts); - g_free(s->compressed_chunk); - g_free(s->uncompressed_chunk); + qemu_vfree(s->compressed_chunk); + qemu_vfree(s->uncompressed_chunk); return ret; } @@ -426,8 +433,8 @@ static void dmg_close(BlockDriverState *bs) g_free(s->lengths); g_free(s->sectors); g_free(s->sectorcounts); - g_free(s->compressed_chunk); - g_free(s->uncompressed_chunk); + qemu_vfree(s->compressed_chunk); + qemu_vfree(s->uncompressed_chunk); inflateEnd(&s->zstream); } diff --git a/block/iscsi.c b/block/iscsi.c index a7bb697..2c9cfc1 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -893,7 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, nb_blocks = sector_qemu2lun(nb_sectors, iscsilun); if (iscsilun->zeroblock == NULL) { - iscsilun->zeroblock = g_malloc0(iscsilun->block_size); + iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size); + if (iscsilun->zeroblock == NULL) { + return -ENOMEM; + } } iscsi_co_init_iscsitask(iscsilun, &iTask); diff --git a/block/mirror.c b/block/mirror.c index c7a655f..5e7a166 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -367,7 +367,12 @@ static void coroutine_fn mirror_run(void *opaque) } end = s->common.len >> BDRV_SECTOR_BITS; - s->buf = qemu_blockalign(bs, s->buf_size); + s->buf = qemu_try_blockalign(bs, s->buf_size); + if (s->buf == NULL) { + ret = -ENOMEM; + goto immediate_exit; + } + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; mirror_free_init(s); diff --git a/block/nfs.c b/block/nfs.c index 8439e0d..fe46c33 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -172,7 +172,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs, nfs_co_init_task(client, &task); - buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE); + buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE); + if (nb_sectors && buf == NULL) { + return -ENOMEM; + } + qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE); if (nfs_pwrite_async(client->context, client->fh, diff --git a/block/parallels.c b/block/parallels.c index 1a5bd35..7325678 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -105,7 +105,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } - s->catalog_bitmap = g_malloc(s->catalog_size * 4); + s->catalog_bitmap = g_try_malloc(s->catalog_size * 4); + if (s->catalog_size && s->catalog_bitmap == NULL) { + ret = -ENOMEM; + goto fail; + } ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4); if (ret < 0) { diff --git a/block/qapi.c b/block/qapi.c index f44f6b4..79d1e6a 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -28,6 +28,13 @@ #include "qapi-visit.h" #include "qapi/qmp-output-visitor.h" #include "qapi/qmp/types.h" +#ifdef __linux__ +#include <linux/fs.h> +#include <sys/ioctl.h> +#ifndef FS_NOCOW_FL +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#endif +#endif BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs) { @@ -165,19 +172,28 @@ void bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, Error **errp) { - uint64_t total_sectors; + int64_t size; const char *backing_filename; char backing_filename2[1024]; BlockDriverInfo bdi; int ret; Error *err = NULL; - ImageInfo *info = g_new0(ImageInfo, 1); - - bdrv_get_geometry(bs, &total_sectors); + ImageInfo *info; +#ifdef __linux__ + int fd, attr; +#endif + + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "Can't get size of device '%s'", + bdrv_get_device_name(bs)); + return; + } + info = g_new0(ImageInfo, 1); info->filename = g_strdup(bs->filename); info->format = g_strdup(bdrv_get_format_name(bs)); - info->virtual_size = total_sectors * 512; + info->virtual_size = size; info->actual_size = bdrv_get_allocated_file_size(bs); info->has_actual_size = info->actual_size >= 0; if (bdrv_is_encrypted(bs)) { @@ -195,6 +211,18 @@ void bdrv_query_image_info(BlockDriverState *bs, info->format_specific = bdrv_get_specific_info(bs); info->has_format_specific = info->format_specific != NULL; +#ifdef __linux__ + /* get NOCOW info */ + fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK); + if (fd >= 0) { + if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) { + info->has_nocow = true; + info->nocow = true; + } + qemu_close(fd); + } +#endif + backing_filename = bs->backing_file; if (backing_filename[0] != '\0') { info->backing_filename = g_strdup(backing_filename); @@ -625,4 +653,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f, func_fprintf(f, "Format specific information:\n"); bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific); } + + if (info->has_nocow && info->nocow) { + func_fprintf(f, "NOCOW flag: set\n"); + } } diff --git a/block/qcow.c b/block/qcow.c index a874056..67332f0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -182,7 +182,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } s->l1_table_offset = header.l1_table_offset; - s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); + s->l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t)); + if (s->l1_table == NULL) { + error_setg(errp, "Could not allocate memory for L1 table"); + ret = -ENOMEM; + goto fail; + } ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)); @@ -193,8 +198,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } - /* alloc L2 cache */ - s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + + /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */ + s->l2_cache = + qemu_try_blockalign(bs->file, + s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + if (s->l2_cache == NULL) { + error_setg(errp, "Could not allocate L2 table cache"); + ret = -ENOMEM; + goto fail; + } s->cluster_cache = g_malloc(s->cluster_size); s->cluster_data = g_malloc(s->cluster_size); s->cluster_cache_offset = -1; @@ -226,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, fail: g_free(s->l1_table); - g_free(s->l2_cache); + qemu_vfree(s->l2_cache); g_free(s->cluster_cache); g_free(s->cluster_data); return ret; @@ -517,7 +530,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, void *orig_buf; if (qiov->niov > 1) { - buf = orig_buf = qemu_blockalign(bs, qiov->size); + buf = orig_buf = qemu_try_blockalign(bs, qiov->size); + if (buf == NULL) { + return -ENOMEM; + } } else { orig_buf = NULL; buf = (uint8_t *)qiov->iov->iov_base; @@ -619,7 +635,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, s->cluster_cache_offset = -1; /* disable compressed cache */ if (qiov->niov > 1) { - buf = orig_buf = qemu_blockalign(bs, qiov->size); + buf = orig_buf = qemu_try_blockalign(bs, qiov->size); + if (buf == NULL) { + return -ENOMEM; + } qemu_iovec_to_buf(qiov, 0, buf, qiov->size); } else { orig_buf = NULL; @@ -685,7 +704,7 @@ static void qcow_close(BlockDriverState *bs) BDRVQcowState *s = bs->opaque; g_free(s->l1_table); - g_free(s->l2_cache); + qemu_vfree(s->l2_cache); g_free(s->cluster_cache); g_free(s->cluster_data); diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 8ecbb5b..5353b44 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -53,10 +53,21 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) c->entries = g_malloc0(sizeof(*c->entries) * num_tables); for (i = 0; i < c->size; i++) { - c->entries[i].table = qemu_blockalign(bs, s->cluster_size); + c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size); + if (c->entries[i].table == NULL) { + goto fail; + } } return c; + +fail: + for (i = 0; i < c->size; i++) { + qemu_vfree(c->entries[i].table); + } + g_free(c->entries); + g_free(c); + return NULL; } int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 4208dc0..5b36018 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -72,14 +72,20 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, #endif new_l1_size2 = sizeof(uint64_t) * new_l1_size; - new_l1_table = g_malloc0(align_offset(new_l1_size2, 512)); + new_l1_table = qemu_try_blockalign(bs->file, + align_offset(new_l1_size2, 512)); + if (new_l1_table == NULL) { + return -ENOMEM; + } + memset(new_l1_table, 0, align_offset(new_l1_size2, 512)); + memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); /* write new table (align to cluster) */ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); if (new_l1_table_offset < 0) { - g_free(new_l1_table); + qemu_vfree(new_l1_table); return new_l1_table_offset; } @@ -113,7 +119,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, if (ret < 0) { goto fail; } - g_free(s->l1_table); + qemu_vfree(s->l1_table); old_l1_table_offset = s->l1_table_offset; s->l1_table_offset = new_l1_table_offset; s->l1_table = new_l1_table; @@ -123,7 +129,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, QCOW2_DISCARD_OTHER); return 0; fail: - g_free(new_l1_table); + qemu_vfree(new_l1_table); qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, QCOW2_DISCARD_OTHER); return ret; @@ -372,7 +378,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, } iov.iov_len = n * BDRV_SECTOR_SIZE; - iov.iov_base = qemu_blockalign(bs, iov.iov_len); + iov.iov_base = qemu_try_blockalign(bs, iov.iov_len); + if (iov.iov_base == NULL) { + return -ENOMEM; + } qemu_iovec_init_external(&qiov, &iov, 1); @@ -702,7 +711,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); assert(m->nb_clusters > 0); - old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t)); + if (old_cluster == NULL) { + ret = -ENOMEM; + goto err; + } /* copy content of unmodified sectors */ ret = perform_cow(bs, m, &m->cow_start); @@ -1106,6 +1119,17 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, return 0; } + /* !*host_offset would overwrite the image header and is reserved for "no + * host offset preferred". If 0 was a valid host offset, it'd trigger the + * following overlap check; do that now to avoid having an invalid value in + * *host_offset. */ + if (!alloc_cluster_offset) { + ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset, + nb_clusters * s->cluster_size); + assert(ret < 0); + goto fail; + } + /* * Save info needed for meta data update. * @@ -1562,7 +1586,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, if (!is_active_l1) { /* inactive L2 tables require a buffer to be stored in when loading * them from disk */ - l2_table = qemu_blockalign(bs, s->cluster_size); + l2_table = qemu_try_blockalign(bs->file, s->cluster_size); + if (l2_table == NULL) { + return -ENOMEM; + } } for (i = 0; i < l1_size; i++) { @@ -1740,7 +1767,11 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs) nb_clusters = size_to_clusters(s, bs->file->total_sectors * BDRV_SECTOR_SIZE); - expanded_clusters = g_malloc0((nb_clusters + 7) / 8); + expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8); + if (expanded_clusters == NULL) { + ret = -ENOMEM; + goto fail; + } ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, &expanded_clusters, &nb_clusters); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index cc6cf74..d60e2fe 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -46,19 +46,25 @@ int qcow2_refcount_init(BlockDriverState *bs) assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t)); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); - s->refcount_table = g_malloc(refcount_table_size2); + s->refcount_table = g_try_malloc(refcount_table_size2); + if (s->refcount_table_size > 0) { + if (s->refcount_table == NULL) { + ret = -ENOMEM; + goto fail; + } BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); ret = bdrv_pread(bs->file, s->refcount_table_offset, s->refcount_table, refcount_table_size2); - if (ret != refcount_table_size2) + if (ret < 0) { goto fail; + } for(i = 0; i < s->refcount_table_size; i++) be64_to_cpus(&s->refcount_table[i]); } return 0; fail: - return -ENOMEM; + return ret; } void qcow2_refcount_close(BlockDriverState *bs) @@ -344,8 +350,14 @@ static int alloc_refcount_block(BlockDriverState *bs, uint64_t meta_offset = (blocks_used * refcount_block_clusters) * s->cluster_size; uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size; - uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size); - uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t)); + uint64_t *new_table = g_try_malloc0(table_size * sizeof(uint64_t)); + uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size); + + assert(table_size > 0 && blocks_clusters > 0); + if (new_table == NULL || new_blocks == NULL) { + ret = -ENOMEM; + goto fail_table; + } /* Fill the new refcount table */ memcpy(new_table, s->refcount_table, @@ -424,6 +436,7 @@ static int alloc_refcount_block(BlockDriverState *bs, return -EAGAIN; fail_table: + g_free(new_blocks); g_free(new_table); fail_block: if (*refcount_block != NULL) { @@ -847,7 +860,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcowState *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2; + bool l1_allocated = false; int64_t old_offset, old_l2_offset; int i, j, l1_modified = 0, nb_csectors, refcount; int ret; @@ -862,8 +876,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, * l1_table_offset when it is the current s->l1_table_offset! Be careful * when changing this! */ if (l1_table_offset != s->l1_table_offset) { - l1_table = g_malloc0(align_offset(l1_size2, 512)); - l1_allocated = 1; + l1_table = g_try_malloc0(align_offset(l1_size2, 512)); + if (l1_size2 && l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } + l1_allocated = true; ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); if (ret < 0) { @@ -875,7 +893,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } else { assert(l1_size == s->l1_size); l1_table = s->l1_table; - l1_allocated = 0; + l1_allocated = false; } for(i = 0; i < l1_size; i++) { @@ -1197,7 +1215,11 @@ static int check_refcounts_l1(BlockDriverState *bs, if (l1_size2 == 0) { l1_table = NULL; } else { - l1_table = g_malloc(l1_size2); + l1_table = g_try_malloc(l1_size2); + if (l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } if (bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2) != l1_size2) goto fail; @@ -1501,7 +1523,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, return -EFBIG; } - refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t)); + refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t)); + if (nb_clusters && refcount_table == NULL) { + res->check_errors++; + return -ENOMEM; + } res->bfi.total_clusters = size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); @@ -1753,9 +1779,13 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, uint64_t l1_ofs = s->snapshots[i].l1_table_offset; uint32_t l1_sz = s->snapshots[i].l1_size; uint64_t l1_sz2 = l1_sz * sizeof(uint64_t); - uint64_t *l1 = g_malloc(l1_sz2); + uint64_t *l1 = g_try_malloc(l1_sz2); int ret; + if (l1_sz2 && l1 == NULL) { + return -ENOMEM; + } + ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2); if (ret < 0) { g_free(l1); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 0aa9def..f67b472 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -381,7 +381,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) sn->l1_table_offset = l1_table_offset; sn->l1_size = s->l1_size; - l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); + l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t)); + if (s->l1_size && l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } + for(i = 0; i < s->l1_size; i++) { l1_table[i] = cpu_to_be64(s->l1_table[i]); } @@ -499,7 +504,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) * Decrease the refcount referenced by the old one only when the L1 * table is overwritten. */ - sn_l1_table = g_malloc0(cur_l1_bytes); + sn_l1_table = g_try_malloc0(cur_l1_bytes); + if (cur_l1_bytes && sn_l1_table == NULL) { + ret = -ENOMEM; + goto fail; + } ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes); if (ret < 0) { @@ -698,17 +707,21 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, return -EFBIG; } new_l1_bytes = sn->l1_size * sizeof(uint64_t); - new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512)); + new_l1_table = qemu_try_blockalign(bs->file, + align_offset(new_l1_bytes, 512)); + if (new_l1_table == NULL) { + return -ENOMEM; + } ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes); if (ret < 0) { error_setg(errp, "Failed to read l1 table for snapshot"); - g_free(new_l1_table); + qemu_vfree(new_l1_table); return ret; } /* Switch the L1 table */ - g_free(s->l1_table); + qemu_vfree(s->l1_table); s->l1_size = sn->l1_size; s->l1_table_offset = sn->l1_table_offset; diff --git a/block/qcow2.c b/block/qcow2.c index 1e3ab6b..435e0e1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -688,8 +688,13 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->l1_size > 0) { - s->l1_table = g_malloc0( + s->l1_table = qemu_try_blockalign(bs->file, align_offset(s->l1_size * sizeof(uint64_t), 512)); + if (s->l1_table == NULL) { + error_setg(errp, "Could not allocate L1 table"); + ret = -ENOMEM; + goto fail; + } ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { @@ -704,11 +709,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, /* alloc L2 table/refcount block cache */ s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE); s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE); + if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) { + error_setg(errp, "Could not allocate metadata caches"); + ret = -ENOMEM; + goto fail; + } s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size - + 512); + s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS + * s->cluster_size + 512); + if (s->cluster_data == NULL) { + error_setg(errp, "Could not allocate temporary cluster buffer"); + ret = -ENOMEM; + goto fail; + } + s->cluster_cache_offset = -1; s->flags = flags; @@ -852,7 +868,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); qcow2_refcount_close(bs); - g_free(s->l1_table); + qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; if (s->l2_table_cache) { @@ -1082,7 +1098,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, */ if (!cluster_data) { cluster_data = - qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS + * s->cluster_size); + if (cluster_data == NULL) { + ret = -ENOMEM; + goto fail; + } } assert(cur_nr_sectors <= @@ -1182,8 +1203,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, if (s->crypt_method) { if (!cluster_data) { - cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * - s->cluster_size); + cluster_data = qemu_try_blockalign(bs->file, + QCOW_MAX_CRYPT_CLUSTERS + * s->cluster_size); + if (cluster_data == NULL) { + ret = -ENOMEM; + goto fail; + } } assert(hd_qiov.size <= @@ -1270,7 +1296,7 @@ fail: static void qcow2_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; - g_free(s->l1_table); + qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; @@ -1557,7 +1583,7 @@ static int preallocate(BlockDriverState *bs) int ret; QCowL2Meta *meta; - nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; + nb_sectors = bdrv_nb_sectors(bs); offset = 0; while (nb_sectors) { @@ -1947,7 +1973,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, /* align end of file to a sector boundary to ease reading with sector based I/Os */ cluster_offset = bdrv_getlength(bs->file); - cluster_offset = (cluster_offset + 511) & ~511; bdrv_truncate(bs->file, cluster_offset); return 0; } diff --git a/block/qed-check.c b/block/qed-check.c index b473dcd..40a882c 100644 --- a/block/qed-check.c +++ b/block/qed-check.c @@ -227,8 +227,11 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) }; int ret; - check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) * - sizeof(check.used_clusters[0])); + check.used_clusters = g_try_malloc0(((check.nclusters + 31) / 32) * + sizeof(check.used_clusters[0])); + if (check.nclusters && check.used_clusters == NULL) { + return -ENOMEM; + } check.result->bfi.total_clusters = (s->header.image_size + s->header.cluster_size - 1) / diff --git a/block/qed.c b/block/qed.c index 7944832..ba395af 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1240,7 +1240,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) struct iovec *iov = acb->qiov->iov; if (!iov->iov_base) { - iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); + iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len); + if (iov->iov_base == NULL) { + qed_aio_complete(acb, -ENOMEM); + return; + } memset(iov->iov_base, 0, iov->iov_len); } } diff --git a/block/raw-posix.c b/block/raw-posix.c index 8e9758e..1194eb0 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -798,7 +798,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) * Ok, we have to do it the hard way, copy all segments into * a single aligned buffer. */ - buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes); + buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes); + if (buf == NULL) { + return -ENOMEM; + } + if (aiocb->aio_type & QEMU_AIO_WRITE) { char *p = buf; int i; diff --git a/block/rbd.c b/block/rbd.c index 2b797d3..4459102 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -617,7 +617,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, RBDAIOCmd cmd) { RBDAIOCB *acb; - RADOSCB *rcb; + RADOSCB *rcb = NULL; rbd_completion_t c; int64_t off, size; char *buf; @@ -631,7 +631,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) { acb->bounce = NULL; } else { - acb->bounce = qemu_blockalign(bs, qiov->size); + acb->bounce = qemu_try_blockalign(bs, qiov->size); + if (acb->bounce == NULL) { + goto failed; + } } acb->ret = 0; acb->error = 0; diff --git a/block/vdi.c b/block/vdi.c index 197bd77..adc6aa9 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,13 +53,6 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" -#ifdef __linux__ -#include <linux/fs.h> -#include <sys/ioctl.h> -#ifndef FS_NOCOW_FL -#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#endif -#endif #if defined(CONFIG_UUID) #include <uuid/uuid.h> @@ -299,7 +292,12 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res, return -ENOTSUP; } - bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t)); + bmap = g_try_malloc(s->header.blocks_in_image * sizeof(uint32_t)); + if (s->header.blocks_in_image && bmap == NULL) { + res->check_errors++; + return -ENOMEM; + } + memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t)); /* Check block map and value of blocks_allocated. */ @@ -357,23 +355,23 @@ static int vdi_make_empty(BlockDriverState *bs) static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename) { const VdiHeader *header = (const VdiHeader *)buf; - int result = 0; + int ret = 0; logout("\n"); if (buf_size < sizeof(*header)) { /* Header too small, no VDI. */ } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) { - result = 100; + ret = 100; } - if (result == 0) { + if (ret == 0) { logout("no vdi image\n"); } else { logout("%s", header->text); } - return result; + return ret; } static int vdi_open(BlockDriverState *bs, QDict *options, int flags, @@ -478,7 +476,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bmap_size = header.blocks_in_image * sizeof(uint32_t); bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE; - s->bmap = g_malloc(bmap_size * SECTOR_SIZE); + s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE); + if (s->bmap == NULL) { + ret = -ENOMEM; + goto fail; + } + ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size); if (ret < 0) { goto fail_free_bmap; @@ -493,7 +496,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, return 0; fail_free_bmap: - g_free(s->bmap); + qemu_vfree(s->bmap); fail: return ret; @@ -681,8 +684,7 @@ static int vdi_co_write(BlockDriverState *bs, static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) { - int fd; - int result = 0; + int ret = 0; uint64_t bytes = 0; uint32_t blocks; size_t block_size = DEFAULT_CLUSTER_SIZE; @@ -690,7 +692,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) VdiHeader header; size_t i; size_t bmap_size; - bool nocow = false; + int64_t offset = 0; + Error *local_err = NULL; + BlockDriverState *bs = NULL; + uint32_t *bmap = NULL; logout("\n"); @@ -707,37 +712,25 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) image_type = VDI_TYPE_STATIC; } #endif - nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); if (bytes > VDI_DISK_SIZE_MAX) { - result = -ENOTSUP; + ret = -ENOTSUP; error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64 ", max supported is 0x%" PRIx64 ")", bytes, VDI_DISK_SIZE_MAX); goto exit; } - fd = qemu_open(filename, - O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, - 0644); - if (fd < 0) { - result = -errno; + ret = bdrv_create_file(filename, opts, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); goto exit; } - - if (nocow) { -#ifdef __linux__ - /* Set NOCOW flag to solve performance issue on fs like btrfs. - * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will - * be ignored since any failure of this operation should not block the - * left work. - */ - int attr; - if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { - attr |= FS_NOCOW_FL; - ioctl(fd, FS_IOC_SETFLAGS, &attr); - } -#endif + ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + NULL, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto exit; } /* We need enough blocks to store the given disk size, @@ -769,13 +762,20 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) vdi_header_print(&header); #endif vdi_header_to_le(&header); - if (write(fd, &header, sizeof(header)) < 0) { - result = -errno; - goto close_and_exit; + ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header)); + if (ret < 0) { + error_setg(errp, "Error writing header to %s", filename); + goto exit; } + offset += sizeof(header); if (bmap_size > 0) { - uint32_t *bmap = g_malloc0(bmap_size); + bmap = g_try_malloc0(bmap_size); + if (bmap == NULL) { + ret = -ENOMEM; + error_setg(errp, "Could not allocate bmap"); + goto exit; + } for (i = 0; i < blocks; i++) { if (image_type == VDI_TYPE_STATIC) { bmap[i] = i; @@ -783,35 +783,33 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) bmap[i] = VDI_UNALLOCATED; } } - if (write(fd, bmap, bmap_size) < 0) { - result = -errno; - g_free(bmap); - goto close_and_exit; + ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size); + if (ret < 0) { + error_setg(errp, "Error writing bmap to %s", filename); + goto exit; } - g_free(bmap); + offset += bmap_size; } if (image_type == VDI_TYPE_STATIC) { - if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { - result = -errno; - goto close_and_exit; + ret = bdrv_truncate(bs, offset + blocks * block_size); + if (ret < 0) { + error_setg(errp, "Failed to statically allocate %s", filename); + goto exit; } } -close_and_exit: - if ((close(fd) < 0) && !result) { - result = -errno; - } - exit: - return result; + bdrv_unref(bs); + g_free(bmap); + return ret; } static void vdi_close(BlockDriverState *bs) { BDRVVdiState *s = bs->opaque; - g_free(s->bmap); + qemu_vfree(s->bmap); migrate_del_blocker(s->migration_blocker); error_free(s->migration_blocker); diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c index fe879ed..0640d3f 100644 --- a/block/vhdx-endian.c +++ b/block/vhdx-endian.c @@ -82,8 +82,6 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d) assert(d != NULL); le32_to_cpus(&d->signature); - le32_to_cpus(&d->trailing_bytes); - le64_to_cpus(&d->leading_bytes); le64_to_cpus(&d->file_offset); le64_to_cpus(&d->sequence_number); } @@ -99,6 +97,15 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d) cpu_to_le64s(&d->sequence_number); } +void vhdx_log_data_le_import(VHDXLogDataSector *d) +{ + assert(d != NULL); + + le32_to_cpus(&d->data_signature); + le32_to_cpus(&d->sequence_high); + le32_to_cpus(&d->sequence_low); +} + void vhdx_log_data_le_export(VHDXLogDataSector *d) { assert(d != NULL); diff --git a/block/vhdx-log.c b/block/vhdx-log.c index a77c040..eb5c7a0 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -84,6 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, if (ret < 0) { goto exit; } + vhdx_log_entry_hdr_le_import(hdr); exit: return ret; @@ -211,7 +212,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, { int valid = false; - if (memcmp(&hdr->signature, "loge", 4)) { + if (hdr->signature != VHDX_LOG_SIGNATURE) { goto exit; } @@ -275,12 +276,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, goto exit; } - if (!memcmp(&desc->signature, "zero", 4)) { + if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { /* valid */ ret = true; } - } else if (!memcmp(&desc->signature, "desc", 4)) { + } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { /* valid */ ret = true; } @@ -327,13 +328,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt) * passed into this function. Each descriptor will also be validated, * and error returned if any are invalid. */ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, - VHDXLogEntries *log, VHDXLogDescEntries **buffer) + VHDXLogEntries *log, VHDXLogDescEntries **buffer, + bool convert_endian) { int ret = 0; uint32_t desc_sectors; uint32_t sectors_read; VHDXLogEntryHeader hdr; VHDXLogDescEntries *desc_entries = NULL; + VHDXLogDescriptor desc; int i; assert(*buffer == NULL); @@ -342,14 +345,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, if (ret < 0) { goto exit; } - vhdx_log_entry_hdr_le_import(&hdr); + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { ret = -EINVAL; goto exit; } desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); - desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE); + desc_entries = qemu_try_blockalign(bs->file, + desc_sectors * VHDX_LOG_SECTOR_SIZE); + if (desc_entries == NULL) { + ret = -ENOMEM; + goto exit; + } ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, desc_sectors, false); @@ -363,12 +371,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, /* put in proper endianness, and validate each desc */ for (i = 0; i < hdr.descriptor_count; i++) { - vhdx_log_desc_le_import(&desc_entries->desc[i]); - if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) { + desc = desc_entries->desc[i]; + vhdx_log_desc_le_import(&desc); + if (convert_endian) { + desc_entries->desc[i] = desc; + } + if (vhdx_log_desc_is_valid(&desc, &hdr) == false) { ret = -EINVAL; goto free_and_exit; } } + if (convert_endian) { + desc_entries->hdr = hdr; + } *buffer = desc_entries; goto exit; @@ -403,7 +418,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); - if (!memcmp(&desc->signature, "desc", 4)) { + if (desc->signature == VHDX_LOG_DESC_SIGNATURE) { /* data sector */ if (data == NULL) { ret = -EFAULT; @@ -431,10 +446,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, memcpy(buffer+offset, &desc->trailing_bytes, 4); - } else if (!memcmp(&desc->signature, "zero", 4)) { + } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) { /* write 'count' sectors of sector */ memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; + } else { + error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32, + desc->signature); + ret = -EINVAL; + goto exit; } file_offset = desc->file_offset; @@ -493,13 +513,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, goto exit; } - ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries); + ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true); if (ret < 0) { goto exit; } for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { - if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) { + if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) { /* data sector, so read a sector to flush */ ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, data, 1, false); @@ -510,6 +530,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, ret = -EINVAL; goto exit; } + vhdx_log_data_le_import(data); } ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); @@ -558,9 +579,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, goto inc_and_exit; } - vhdx_log_entry_hdr_le_import(&hdr); - - if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { goto inc_and_exit; } @@ -573,13 +591,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); - /* Read desc sectors, and calculate log checksum */ + /* Read all log sectors, and calculate log checksum */ total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; /* read_desc() will increment the read idx */ - ret = vhdx_log_read_desc(bs, s, log, &desc_buffer); + ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false); if (ret < 0) { goto free_and_exit; } @@ -602,7 +620,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, } } crc ^= 0xffffffff; - if (crc != desc_buffer->hdr.checksum) { + if (crc != hdr.checksum) { goto free_and_exit; } @@ -962,7 +980,6 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, * last data sector */ vhdx_update_checksum(buffer, total_length, offsetof(VHDXLogEntryHeader, checksum)); - cpu_to_le32s((uint32_t *)(buffer + 4)); /* now write to the log */ ret = vhdx_log_write_sectors(bs, &s->log, §ors_written, buffer, diff --git a/block/vhdx.c b/block/vhdx.c index fedcf9f..f666940 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -135,10 +135,8 @@ typedef struct VHDXSectorInfo { * buf: buffer pointer * size: size of buffer (must be > crc_offset+4) * - * Note: The resulting checksum is in the CPU endianness, not necessarily - * in the file format endianness (LE). Any header export to disk should - * make sure that vhdx_header_le_export() is used to convert to the - * correct endianness + * Note: The buffer should have all multi-byte data in little-endian format, + * and the resulting checksum is in little endian format. */ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) { @@ -149,6 +147,7 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset) memset(buf + crc_offset, 0, sizeof(crc)); crc = crc32c(0xffffffff, buf, size); + cpu_to_le32s(&crc); memcpy(buf + crc_offset, &crc, sizeof(crc)); return crc; @@ -300,7 +299,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr, { uint8_t *buffer = NULL; int ret; - VHDXHeader header_le; + VHDXHeader *header_le; assert(bs_file != NULL); assert(hdr != NULL); @@ -321,11 +320,12 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr, } /* overwrite the actual VHDXHeader portion */ - memcpy(buffer, hdr, sizeof(VHDXHeader)); - hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, - offsetof(VHDXHeader, checksum)); - vhdx_header_le_export(hdr, &header_le); - ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader)); + header_le = (VHDXHeader *)buffer; + memcpy(header_le, hdr, sizeof(VHDXHeader)); + vhdx_header_le_export(hdr, header_le); + vhdx_update_checksum(buffer, VHDX_HEADER_SIZE, + offsetof(VHDXHeader, checksum)); + ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader)); exit: qemu_vfree(buffer); @@ -432,13 +432,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, } /* copy over just the relevant portion that we need */ memcpy(header1, buffer, sizeof(VHDXHeader)); - vhdx_header_le_import(header1); - if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && - !memcmp(&header1->signature, "head", 4) && - header1->version == 1) { - h1_seq = header1->sequence_number; - h1_valid = true; + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { + vhdx_header_le_import(header1); + if (header1->signature == VHDX_HEADER_SIGNATURE && + header1->version == 1) { + h1_seq = header1->sequence_number; + h1_valid = true; + } } ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE); @@ -447,13 +448,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, } /* copy over just the relevant portion that we need */ memcpy(header2, buffer, sizeof(VHDXHeader)); - vhdx_header_le_import(header2); - if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && - !memcmp(&header2->signature, "head", 4) && - header2->version == 1) { - h2_seq = header2->sequence_number; - h2_valid = true; + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) { + vhdx_header_le_import(header2); + if (header2->signature == VHDX_HEADER_SIGNATURE && + header2->version == 1) { + h2_seq = header2->sequence_number; + h2_valid = true; + } } /* If there is only 1 valid header (or no valid headers), we @@ -519,15 +521,21 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) goto fail; } memcpy(&s->rt, buffer, sizeof(s->rt)); - vhdx_region_header_le_import(&s->rt); offset += sizeof(s->rt); - if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) || - memcmp(&s->rt.signature, "regi", 4)) { + if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) { ret = -EINVAL; goto fail; } + vhdx_region_header_le_import(&s->rt); + + if (s->rt.signature != VHDX_REGION_SIGNATURE) { + ret = -EINVAL; + goto fail; + } + + /* Per spec, maximum region table entry count is 2047 */ if (s->rt.entry_count > 2047) { ret = -EINVAL; @@ -630,7 +638,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) vhdx_metadata_header_le_import(&s->metadata_hdr); - if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) { + if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) { ret = -EINVAL; goto exit; } @@ -950,7 +958,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } /* s->bat is freed in vhdx_close() */ - s->bat = qemu_blockalign(bs, s->bat_rt.length); + s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length); + if (s->bat == NULL) { + ret = -ENOMEM; + goto fail; + } ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); if (ret < 0) { @@ -1540,7 +1552,8 @@ exit: */ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, uint64_t image_size, VHDXImageType type, - bool use_zero_blocks, VHDXRegionTableEntry *rt_bat) + bool use_zero_blocks, uint64_t file_offset, + uint32_t length) { int ret = 0; uint64_t data_file_offset; @@ -1555,7 +1568,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, /* this gives a data start after BAT/bitmap entries, and well * past any metadata entries (with a 4 MB buffer for future * expansion */ - data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB; + data_file_offset = file_offset + length + 5 * MiB; total_sectors = image_size >> s->logical_sector_size_bits; if (type == VHDX_TYPE_DYNAMIC) { @@ -1579,7 +1592,11 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, use_zero_blocks || bdrv_has_zero_init(bs) == 0) { /* for a fixed file, the default BAT entry is not zero */ - s->bat = g_malloc0(rt_bat->length); + s->bat = g_try_malloc0(length); + if (length && s->bat != NULL) { + ret = -ENOMEM; + goto exit; + } block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT : PAYLOAD_BLOCK_NOT_PRESENT; block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state; @@ -1594,7 +1611,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s, cpu_to_le64s(&s->bat[sinfo.bat_idx]); sector_num += s->sectors_per_block; } - ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length); + ret = bdrv_pwrite(bs, file_offset, s->bat, length); if (ret < 0) { goto exit; } @@ -1626,6 +1643,8 @@ static int vhdx_create_new_region_table(BlockDriverState *bs, int ret = 0; uint32_t offset = 0; void *buffer = NULL; + uint64_t bat_file_offset; + uint32_t bat_length; BDRVVHDXState *s = NULL; VHDXRegionTableHeader *region_table; VHDXRegionTableEntry *rt_bat; @@ -1674,19 +1693,26 @@ static int vhdx_create_new_region_table(BlockDriverState *bs, rt_metadata->length = 1 * MiB; /* min size, and more than enough */ *metadata_offset = rt_metadata->file_offset; + bat_file_offset = rt_bat->file_offset; + bat_length = rt_bat->length; + + vhdx_region_header_le_export(region_table); + vhdx_region_entry_le_export(rt_bat); + vhdx_region_entry_le_export(rt_metadata); + vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE, offsetof(VHDXRegionTableHeader, checksum)); /* The region table gives us the data we need to create the BAT, * so do that now */ - ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat); + ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, + bat_file_offset, bat_length); + if (ret < 0) { + goto exit; + } /* Now write out the region headers to disk */ - vhdx_region_header_le_export(region_table); - vhdx_region_entry_le_export(rt_bat); - vhdx_region_entry_le_export(rt_metadata); - ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer, VHDX_HEADER_BLOCK_SIZE); if (ret < 0) { diff --git a/block/vhdx.h b/block/vhdx.h index 5370010..b4a12a0 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -435,6 +435,7 @@ void vhdx_header_le_import(VHDXHeader *h); void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); void vhdx_log_desc_le_import(VHDXLogDescriptor *d); void vhdx_log_desc_le_export(VHDXLogDescriptor *d); +void vhdx_log_data_le_import(VHDXLogDataSector *d); void vhdx_log_data_le_export(VHDXLogDataSector *d); void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); diff --git a/block/vmdk.c b/block/vmdk.c index 0517bba..01412a8 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -106,6 +106,7 @@ typedef struct VmdkExtent { uint32_t l2_cache_counts[L2_CACHE_SIZE]; int64_t cluster_sectors; + int64_t next_cluster_sector; char *type; } VmdkExtent; @@ -124,7 +125,6 @@ typedef struct BDRVVmdkState { } BDRVVmdkState; typedef struct VmdkMetaData { - uint32_t offset; unsigned int l1_index; unsigned int l2_index; unsigned int l2_offset; @@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs, { VmdkExtent *extent; BDRVVmdkState *s = bs->opaque; + int64_t length; if (cluster_sectors > 0x200000) { /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */ @@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs, return -EFBIG; } + length = bdrv_getlength(file); + if (length < 0) { + return length; + } + s->extents = g_realloc(s->extents, (s->num_extents + 1) * sizeof(VmdkExtent)); extent = &s->extents[s->num_extents]; @@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs, extent->l1_entry_sectors = l2_size * cluster_sectors; extent->l2_size = l2_size; extent->cluster_sectors = flat ? sectors : cluster_sectors; + extent->next_cluster_sector = + ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors); if (s->num_extents > 1) { extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; @@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, /* read the L1 table */ l1_size = extent->l1_size * sizeof(uint32_t); - extent->l1_table = g_malloc(l1_size); + extent->l1_table = g_try_malloc(l1_size); + if (l1_size && extent->l1_table == NULL) { + return -ENOMEM; + } + ret = bdrv_pread(extent->file, extent->l1_table_offset, extent->l1_table, @@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, } if (extent->l1_backup_table_offset) { - extent->l1_backup_table = g_malloc(l1_size); + extent->l1_backup_table = g_try_malloc(l1_size); + if (l1_size && extent->l1_backup_table == NULL) { + ret = -ENOMEM; + goto fail_l1; + } ret = bdrv_pread(extent->file, extent->l1_backup_table_offset, extent->l1_backup_table, @@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } - if (bdrv_getlength(file) < - le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) { + if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) { error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes", (int64_t)(le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE)); @@ -952,57 +967,97 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) } } +/** + * get_whole_cluster + * + * Copy backing file's cluster that covers @sector_num, otherwise write zero, + * to the cluster at @cluster_sector_num. + * + * If @skip_start_sector < @skip_end_sector, the relative range + * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave + * it for call to write user data in the request. + */ static int get_whole_cluster(BlockDriverState *bs, - VmdkExtent *extent, - uint64_t cluster_offset, - uint64_t offset, - bool allocate) + VmdkExtent *extent, + uint64_t cluster_sector_num, + uint64_t sector_num, + uint64_t skip_start_sector, + uint64_t skip_end_sector) { int ret = VMDK_OK; - uint8_t *whole_grain = NULL; + int64_t cluster_bytes; + uint8_t *whole_grain; + /* For COW, align request sector_num to cluster start */ + sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors); + cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; + whole_grain = qemu_blockalign(bs, cluster_bytes); + + if (!bs->backing_hd) { + memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS); + memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0, + cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS)); + } + + assert(skip_end_sector <= extent->cluster_sectors); /* we will be here if it's first write on non-exist grain(cluster). * try to read from parent image, if exist */ - if (bs->backing_hd) { - whole_grain = - qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS); - if (!vmdk_is_cid_valid(bs)) { - ret = VMDK_ERROR; - goto exit; - } + if (bs->backing_hd && !vmdk_is_cid_valid(bs)) { + ret = VMDK_ERROR; + goto exit; + } - /* floor offset to cluster */ - offset -= offset % (extent->cluster_sectors * 512); - ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, - extent->cluster_sectors); + /* Read backing data before skip range */ + if (skip_start_sector > 0) { + if (bs->backing_hd) { + ret = bdrv_read(bs->backing_hd, sector_num, + whole_grain, skip_start_sector); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + ret = bdrv_write(extent->file, cluster_sector_num, whole_grain, + skip_start_sector); if (ret < 0) { ret = VMDK_ERROR; goto exit; } - - /* Write grain only into the active image */ - ret = bdrv_write(extent->file, cluster_offset, whole_grain, - extent->cluster_sectors); + } + /* Read backing data after skip range */ + if (skip_end_sector < extent->cluster_sectors) { + if (bs->backing_hd) { + ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector, + whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), + extent->cluster_sectors - skip_end_sector); + if (ret < 0) { + ret = VMDK_ERROR; + goto exit; + } + } + ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector, + whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), + extent->cluster_sectors - skip_end_sector); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } + exit: qemu_vfree(whole_grain); return ret; } -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, + uint32_t offset) { - uint32_t offset; - QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); - offset = cpu_to_le32(m_data->offset); + offset = cpu_to_le32(offset); /* update L2 table */ if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(m_data->offset)), + + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { return VMDK_ERROR; } @@ -1012,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(m_data->offset)), + + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { return VMDK_ERROR; } @@ -1024,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) return VMDK_OK; } +/** + * get_cluster_offset + * + * Look up cluster offset in extent file by sector number, and store in + * @cluster_offset. + * + * For flat extents, the start offset as parsed from the description file is + * returned. + * + * For sparse extents, look up in L1, L2 table. If allocate is true, return an + * offset for a new cluster and update L2 cache. If there is a backing file, + * COW is done before returning; otherwise, zeroes are written to the allocated + * cluster. Both COW and zero writing skips the sector range + * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller + * has new data to write there. + * + * Returns: VMDK_OK if cluster exists and mapped in the image. + * VMDK_UNALLOC if cluster is not mapped and @allocate is false. + * VMDK_ERROR if failed. + */ static int get_cluster_offset(BlockDriverState *bs, - VmdkExtent *extent, - VmdkMetaData *m_data, - uint64_t offset, - int allocate, - uint64_t *cluster_offset) + VmdkExtent *extent, + VmdkMetaData *m_data, + uint64_t offset, + bool allocate, + uint64_t *cluster_offset, + uint64_t skip_start_sector, + uint64_t skip_end_sector) { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; uint32_t min_count, *l2_table; bool zeroed = false; + int64_t ret; + int32_t cluster_sector; if (m_data) { m_data->valid = 0; @@ -1088,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[min_index] = 1; found: l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; - *cluster_offset = le32_to_cpu(l2_table[l2_index]); + cluster_sector = le32_to_cpu(l2_table[l2_index]); if (m_data) { m_data->valid = 1; m_data->l1_index = l1_index; m_data->l2_index = l2_index; - m_data->offset = *cluster_offset; m_data->l2_offset = l2_offset; m_data->l2_cache_entry = &l2_table[l2_index]; } - if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { + if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { zeroed = true; } - if (!*cluster_offset || zeroed) { + if (!cluster_sector || zeroed) { if (!allocate) { return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } - /* Avoid the L2 tables update for the images that have snapshots. */ - *cluster_offset = bdrv_getlength(extent->file); - if (!extent->compressed) { - bdrv_truncate( - extent->file, - *cluster_offset + (extent->cluster_sectors << 9) - ); - } - - *cluster_offset >>= 9; - l2_table[l2_index] = cpu_to_le32(*cluster_offset); + cluster_sector = extent->next_cluster_sector; + extent->next_cluster_sector += extent->cluster_sectors; /* First of all we write grain itself, to avoid race condition * that may to corrupt the image. * This problem may occur because of insufficient space on host disk * or inappropriate VM shutdown. */ - if (get_whole_cluster( - bs, extent, *cluster_offset, offset, allocate) == -1) { - return VMDK_ERROR; - } - - if (m_data) { - m_data->offset = *cluster_offset; + ret = get_whole_cluster(bs, extent, + cluster_sector, + offset >> BDRV_SECTOR_BITS, + skip_start_sector, skip_end_sector); + if (ret) { + return ret; } } - *cluster_offset <<= 9; + *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; return VMDK_OK; } @@ -1168,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, } qemu_co_mutex_lock(&s->lock); ret = get_cluster_offset(bs, extent, NULL, - sector_num * 512, 0, &offset); + sector_num * 512, false, &offset, + 0, 0); qemu_co_mutex_unlock(&s->lock); switch (ret) { @@ -1321,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, if (!extent) { return -EIO; } - ret = get_cluster_offset( - bs, extent, NULL, - sector_num << 9, 0, &cluster_offset); + ret = get_cluster_offset(bs, extent, NULL, + sector_num << 9, false, &cluster_offset, + 0, 0); extent_begin_sector = extent->end_sector - extent->sectors; extent_relative_sector_num = sector_num - extent_begin_sector; index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; @@ -1404,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (!extent) { return -EIO; } - ret = get_cluster_offset( - bs, - extent, - &m_data, - sector_num << 9, !extent->compressed, - &cluster_offset); + extent_begin_sector = extent->end_sector - extent->sectors; + extent_relative_sector_num = sector_num - extent_begin_sector; + index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; + n = extent->cluster_sectors - index_in_cluster; + if (n > nb_sectors) { + n = nb_sectors; + } + ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9, + !(extent->compressed || zeroed), + &cluster_offset, + index_in_cluster, index_in_cluster + n); if (extent->compressed) { if (ret == VMDK_OK) { /* Refuse write to allocated cluster for streamOptimized */ @@ -1418,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, return -EIO; } else { /* allocate */ - ret = get_cluster_offset( - bs, - extent, - &m_data, - sector_num << 9, 1, - &cluster_offset); + ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9, + true, &cluster_offset, 0, 0); } } if (ret == VMDK_ERROR) { return -EINVAL; } - extent_begin_sector = extent->end_sector - extent->sectors; - extent_relative_sector_num = sector_num - extent_begin_sector; - index_in_cluster = extent_relative_sector_num % extent->cluster_sectors; - n = extent->cluster_sectors - index_in_cluster; - if (n > nb_sectors) { - n = nb_sectors; - } if (zeroed) { /* Do zeroed write, buf is ignored */ if (extent->has_zero_grain && @@ -1443,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, n >= extent->cluster_sectors) { n = extent->cluster_sectors; if (!zero_dry_run) { - m_data.offset = VMDK_GTE_ZEROED; /* update L2 tables */ - if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) + != VMDK_OK) { return -EIO; } } @@ -1461,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, } if (m_data.valid) { /* update L2 tables */ - if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + if (vmdk_L2update(extent, &m_data, + cluster_offset >> BDRV_SECTOR_BITS) + != VMDK_OK) { return -EIO; } } @@ -1999,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; int64_t sector_num = 0; - int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; + int64_t total_sectors = bdrv_nb_sectors(bs); int ret; uint64_t cluster_offset; @@ -2020,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, } ret = get_cluster_offset(bs, extent, NULL, sector_num << BDRV_SECTOR_BITS, - 0, &cluster_offset); + false, &cluster_offset, 0, 0); if (ret == VMDK_ERROR) { fprintf(stderr, "ERROR: could not get cluster_offset for sector %" diff --git a/block/vpc.c b/block/vpc.c index 8b376a4..055efc4 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -29,13 +29,6 @@ #if defined(CONFIG_UUID) #include <uuid/uuid.h> #endif -#ifdef __linux__ -#include <linux/fs.h> -#include <sys/ioctl.h> -#ifndef FS_NOCOW_FL -#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#endif -#endif /**************************************************************/ @@ -276,7 +269,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4); + s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4); + if (s->pagetable == NULL) { + ret = -ENOMEM; + goto fail; + } s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); @@ -656,39 +653,41 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, return 0; } -static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors) +static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, + int64_t total_sectors) { VHDDynDiskHeader *dyndisk_header = (VHDDynDiskHeader *) buf; size_t block_size, num_bat_entries; int i; - int ret = -EIO; + int ret; + int64_t offset = 0; // Write the footer (twice: at the beginning and at the end) block_size = 0x200000; num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); - if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) { + ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + if (ret) { goto fail; } - if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) { - goto fail; - } - if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) { + offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); + ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + if (ret < 0) { goto fail; } // Write the initial BAT - if (lseek(fd, 3 * 512, SEEK_SET) < 0) { - goto fail; - } + offset = 3 * 512; memset(buf, 0xFF, 512); for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { - if (write(fd, buf, 512) != 512) { + ret = bdrv_pwrite_sync(bs, offset, buf, 512); + if (ret < 0) { goto fail; } + offset += 512; } // Prepare the Dynamic Disk Header @@ -709,39 +708,35 @@ static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors) dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024)); // Write the header - if (lseek(fd, 512, SEEK_SET) < 0) { - goto fail; - } + offset = 512; - if (write(fd, buf, 1024) != 1024) { + ret = bdrv_pwrite_sync(bs, offset, buf, 1024); + if (ret < 0) { goto fail; } - ret = 0; fail: return ret; } -static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size) +static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, + int64_t total_size) { - int ret = -EIO; + int ret; /* Add footer to total size */ - total_size += 512; - if (ftruncate(fd, total_size) != 0) { - ret = -errno; - goto fail; - } - if (lseek(fd, -512, SEEK_END) < 0) { - goto fail; - } - if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) { - goto fail; + total_size += HEADER_SIZE; + + ret = bdrv_truncate(bs, total_size); + if (ret < 0) { + return ret; } - ret = 0; + ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE); + if (ret < 0) { + return ret; + } - fail: return ret; } @@ -750,7 +745,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) uint8_t buf[1024]; VHDFooter *footer = (VHDFooter *) buf; char *disk_type_param; - int fd, i; + int i; uint16_t cyls = 0; uint8_t heads = 0; uint8_t secs_per_cyl = 0; @@ -758,7 +753,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; - bool nocow = false; + Error *local_err = NULL; + BlockDriverState *bs = NULL; /* Read out options */ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); @@ -775,28 +771,17 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } else { disk_type = VHD_DYNAMIC; } - nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false); - /* Create the file */ - fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) { - ret = -EIO; + ret = bdrv_create_file(filename, opts, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); goto out; } - - if (nocow) { -#ifdef __linux__ - /* Set NOCOW flag to solve performance issue on fs like btrfs. - * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will - * be ignored since any failure of this operation should not block the - * left work. - */ - int attr; - if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) { - attr |= FS_NOCOW_FL; - ioctl(fd, FS_IOC_SETFLAGS, &attr); - } -#endif + ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + NULL, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + goto out; } /* @@ -810,7 +795,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) &secs_per_cyl)) { ret = -EFBIG; - goto fail; + goto out; } } @@ -856,14 +841,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); if (disk_type == VHD_DYNAMIC) { - ret = create_dynamic_disk(fd, buf, total_sectors); + ret = create_dynamic_disk(bs, buf, total_sectors); } else { - ret = create_fixed_disk(fd, buf, total_size); + ret = create_fixed_disk(bs, buf, total_size); } -fail: - qemu_close(fd); out: + bdrv_unref(bs); g_free(disk_type_param); return ret; } diff --git a/block/win32-aio.c b/block/win32-aio.c index 8e417f7..5030e32 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -139,7 +139,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, waiocb->is_read = (type == QEMU_AIO_READ); if (qiov->niov > 1) { - waiocb->buf = qemu_blockalign(bs, qiov->size); + waiocb->buf = qemu_try_blockalign(bs, qiov->size); + if (waiocb->buf == NULL) { + goto out; + } if (type & QEMU_AIO_WRITE) { iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size); } @@ -168,6 +171,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs, out_dec_count: aio->count--; +out: qemu_aio_release(waiocb); return NULL; } |