aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/nbd.c4
-rw-r--r--block/raw-posix.c9
-rw-r--r--block/rbd.c83
-rw-r--r--block/vmdk.c14
-rw-r--r--cpus.c5
-rw-r--r--dma-helpers.c58
-rw-r--r--dma.h10
-rw-r--r--hw/ide/ahci.c8
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/ide/macio.c2
-rw-r--r--hw/scsi-bus.c22
-rw-r--r--hw/scsi-disk.c84
-rw-r--r--hw/scsi-generic.c6
-rw-r--r--linux-aio.c11
-rw-r--r--nbd.c42
-rw-r--r--nbd.h20
-rw-r--r--posix-aio-compat.c38
-rw-r--r--qemu-nbd.c13
18 files changed, 254 insertions, 177 deletions
diff --git a/block/nbd.c b/block/nbd.c
index 70edd81..76f04d8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -48,6 +48,7 @@
typedef struct BDRVNBDState {
int sock;
+ uint32_t nbdflags;
off_t size;
size_t blocksize;
char *export_name; /* An NBD server may export several devices */
@@ -111,7 +112,6 @@ static int nbd_establish_connection(BlockDriverState *bs)
int ret;
off_t size;
size_t blocksize;
- uint32_t nbdflags;
if (s->host_spec[0] == '/') {
sock = unix_socket_outgoing(s->host_spec);
@@ -126,7 +126,7 @@ static int nbd_establish_connection(BlockDriverState *bs)
}
/* NBD handshake */
- ret = nbd_receive_negotiate(sock, s->export_name, &nbdflags, &size,
+ ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
&blocksize);
if (ret == -1) {
logout("Failed to negotiate with the NBD server\n");
diff --git a/block/raw-posix.c b/block/raw-posix.c
index a624f56..305998d 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -839,7 +839,14 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
static int raw_flush(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- return qemu_fdatasync(s->fd);
+ int ret;
+
+ ret = qemu_fdatasync(s->fd);
+ if (ret < 0) {
+ return -errno;
+ }
+
+ return 0;
}
#ifdef CONFIG_XFS
diff --git a/block/rbd.c b/block/rbd.c
index 1b78d51..3068c82 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,35 +13,33 @@
#include "qemu-common.h"
#include "qemu-error.h"
-
#include "block_int.h"
#include <rbd/librbd.h>
-
-
/*
* When specifying the image filename use:
*
* rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
*
- * poolname must be the name of an existing rados pool
+ * poolname must be the name of an existing rados pool.
*
- * devicename is the basename for all objects used to
- * emulate the raw device.
+ * devicename is the name of the rbd image.
*
- * Each option given is used to configure rados, and may be
- * any Ceph option, or "conf". The "conf" option specifies
- * a Ceph configuration file to read.
+ * Each option given is used to configure rados, and may be any valid
+ * Ceph option, "id", or "conf".
*
- * Metadata information (image size, ...) is stored in an
- * object with the name "devicename.rbd".
+ * The "id" option indicates what user we should authenticate as to
+ * the Ceph cluster. If it is excluded we will use the Ceph default
+ * (normally 'admin').
*
- * The raw device is split into 4MB sized objects by default.
- * The sequencenumber is encoded in a 12 byte long hex-string,
- * and is attached to the devicename, separated by a dot.
- * e.g. "devicename.1234567890ab"
+ * The "conf" option specifies a Ceph configuration file to read. If
+ * it is not specified, we will read from the default Ceph locations
+ * (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration
+ * file, specify conf=/dev/null.
*
+ * Configuration values containing :, @, or = can be escaped with a
+ * leading "\".
*/
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
@@ -104,8 +102,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
*p = NULL;
if (delim != '\0') {
- end = strchr(src, delim);
- if (end) {
+ for (end = src; *end; ++end) {
+ if (*end == delim) {
+ break;
+ }
+ if (*end == '\\' && end[1] != '\0') {
+ end++;
+ }
+ }
+ if (*end == delim) {
*p = end + 1;
*end = '\0';
}
@@ -124,6 +129,19 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
return 0;
}
+static void qemu_rbd_unescape(char *src)
+{
+ char *p;
+
+ for (p = src; *src; ++src, ++p) {
+ if (*src == '\\' && src[1] != '\0') {
+ src++;
+ }
+ *p = *src;
+ }
+ *p = '\0';
+}
+
static int qemu_rbd_parsename(const char *filename,
char *pool, int pool_len,
char *snap, int snap_len,
@@ -148,6 +166,7 @@ static int qemu_rbd_parsename(const char *filename,
ret = -EINVAL;
goto done;
}
+ qemu_rbd_unescape(pool);
if (strchr(p, '@')) {
ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p);
@@ -155,9 +174,11 @@ static int qemu_rbd_parsename(const char *filename,
goto done;
}
ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p);
+ qemu_rbd_unescape(snap);
} else {
ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p);
}
+ qemu_rbd_unescape(name);
if (ret < 0 || !p) {
goto done;
}
@@ -213,6 +234,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
if (ret < 0) {
break;
}
+ qemu_rbd_unescape(name);
if (!p) {
error_report("conf option %s has no value", name);
@@ -225,6 +247,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
if (ret < 0) {
break;
}
+ qemu_rbd_unescape(value);
if (strcmp(name, "conf") == 0) {
ret = rados_conf_read_file(cluster, value);
@@ -298,11 +321,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options)
}
if (strstr(conf, "conf=") == NULL) {
- if (rados_conf_read_file(cluster, NULL) < 0) {
- error_report("error reading config file");
- rados_shutdown(cluster);
- return -EIO;
- }
+ /* try default location, but ignore failure */
+ rados_conf_read_file(cluster, NULL);
}
if (conf[0] != '\0' &&
@@ -441,11 +461,8 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
}
if (strstr(conf, "conf=") == NULL) {
- r = rados_conf_read_file(s->cluster, NULL);
- if (r < 0) {
- error_report("error reading config file");
- goto failed_shutdown;
- }
+ /* try default location, but ignore failure */
+ rados_conf_read_file(s->cluster, NULL);
}
if (conf[0] != '\0') {
@@ -688,6 +705,17 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
}
+static int qemu_rbd_flush(BlockDriverState *bs)
+{
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
+ /* rbd_flush added in 0.1.1 */
+ BDRVRBDState *s = bs->opaque;
+ return rbd_flush(s->image);
+#else
+ return 0;
+#endif
+}
+
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVRBDState *s = bs->opaque;
@@ -823,6 +851,7 @@ static BlockDriver bdrv_rbd = {
.bdrv_file_open = qemu_rbd_open,
.bdrv_close = qemu_rbd_close,
.bdrv_create = qemu_rbd_create,
+ .bdrv_flush = qemu_rbd_flush,
.bdrv_get_info = qemu_rbd_getinfo,
.create_options = qemu_rbd_create_options,
.bdrv_getlength = qemu_rbd_getlength,
diff --git a/block/vmdk.c b/block/vmdk.c
index 6c8edfc..5d16ec4 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -179,11 +179,16 @@ static void vmdk_free_extents(BlockDriverState *bs)
{
int i;
BDRVVmdkState *s = bs->opaque;
+ VmdkExtent *e;
for (i = 0; i < s->num_extents; i++) {
- g_free(s->extents[i].l1_table);
- g_free(s->extents[i].l2_cache);
- g_free(s->extents[i].l1_backup_table);
+ e = &s->extents[i];
+ g_free(e->l1_table);
+ g_free(e->l2_cache);
+ g_free(e->l1_backup_table);
+ if (e->file != bs->file) {
+ bdrv_delete(e->file);
+ }
}
g_free(s->extents);
}
@@ -619,12 +624,13 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
s->desc_offset = 0;
ret = vmdk_parse_extents(buf, bs, bs->file->filename);
if (ret) {
+ vmdk_free_extents(bs);
return ret;
}
/* try to open parent images, if exist */
if (vmdk_parent_open(bs)) {
- g_free(s->extents);
+ vmdk_free_extents(bs);
return -EINVAL;
}
s->parent_cid = vmdk_read_cid(bs, 1);
diff --git a/cpus.c b/cpus.c
index 883c27a..8978779 100644
--- a/cpus.c
+++ b/cpus.c
@@ -380,11 +380,6 @@ static int qemu_signal_init(void)
int sigfd;
sigset_t set;
- /* SIGUSR2 used by posix-aio-compat.c */
- sigemptyset(&set);
- sigaddset(&set, SIGUSR2);
- pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
/*
* SIG_IPI must be blocked in the main thread and must not be caught
* by sigwait() in the signal thread. Otherwise, the cpu thread will
diff --git a/dma-helpers.c b/dma-helpers.c
index 4610ea0..86d2d0a 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -42,7 +42,8 @@ typedef struct {
BlockDriverAIOCB *acb;
QEMUSGList *sg;
uint64_t sector_num;
- int is_write;
+ bool to_dev;
+ bool in_cancel;
int sg_cur_index;
target_phys_addr_t sg_cur_byte;
QEMUIOVector iov;
@@ -58,7 +59,7 @@ static void reschedule_dma(void *opaque)
qemu_bh_delete(dbs->bh);
dbs->bh = NULL;
- dma_bdrv_cb(opaque, 0);
+ dma_bdrv_cb(dbs, 0);
}
static void continue_after_map_failure(void *opaque)
@@ -75,9 +76,29 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
for (i = 0; i < dbs->iov.niov; ++i) {
cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
- dbs->iov.iov[i].iov_len, !dbs->is_write,
+ dbs->iov.iov[i].iov_len, !dbs->to_dev,
dbs->iov.iov[i].iov_len);
}
+ qemu_iovec_reset(&dbs->iov);
+}
+
+static void dma_complete(DMAAIOCB *dbs, int ret)
+{
+ dma_bdrv_unmap(dbs);
+ if (dbs->common.cb) {
+ dbs->common.cb(dbs->common.opaque, ret);
+ }
+ qemu_iovec_destroy(&dbs->iov);
+ if (dbs->bh) {
+ qemu_bh_delete(dbs->bh);
+ dbs->bh = NULL;
+ }
+ if (!dbs->in_cancel) {
+ /* Requests may complete while dma_aio_cancel is in progress. In
+ * this case, the AIOCB should not be released because it is still
+ * referenced by dma_aio_cancel. */
+ qemu_aio_release(dbs);
+ }
}
static void dma_bdrv_cb(void *opaque, int ret)
@@ -89,19 +110,16 @@ static void dma_bdrv_cb(void *opaque, int ret)
dbs->acb = NULL;
dbs->sector_num += dbs->iov.size / 512;
dma_bdrv_unmap(dbs);
- qemu_iovec_reset(&dbs->iov);
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
- dbs->common.cb(dbs->common.opaque, ret);
- qemu_iovec_destroy(&dbs->iov);
- qemu_aio_release(dbs);
+ dma_complete(dbs, ret);
return;
}
while (dbs->sg_cur_index < dbs->sg->nsg) {
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
- mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write);
+ mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev);
if (!mem)
break;
qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -120,9 +138,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
dbs->iov.size / 512, dma_bdrv_cb, dbs);
if (!dbs->acb) {
- dma_bdrv_unmap(dbs);
- qemu_iovec_destroy(&dbs->iov);
- return;
+ dma_complete(dbs, -EIO);
}
}
@@ -131,8 +147,14 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb)
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
if (dbs->acb) {
- bdrv_aio_cancel(dbs->acb);
+ BlockDriverAIOCB *acb = dbs->acb;
+ dbs->acb = NULL;
+ dbs->in_cancel = true;
+ bdrv_aio_cancel(acb);
+ dbs->in_cancel = false;
}
+ dbs->common.cb = NULL;
+ dma_complete(dbs, 0);
}
static AIOPool dma_aio_pool = {
@@ -143,7 +165,7 @@ static AIOPool dma_aio_pool = {
BlockDriverAIOCB *dma_bdrv_io(
BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
- void *opaque, int is_write)
+ void *opaque, bool to_dev)
{
DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque);
@@ -153,15 +175,11 @@ BlockDriverAIOCB *dma_bdrv_io(
dbs->sector_num = sector_num;
dbs->sg_cur_index = 0;
dbs->sg_cur_byte = 0;
- dbs->is_write = is_write;
+ dbs->to_dev = to_dev;
dbs->io_func = io_func;
dbs->bh = NULL;
qemu_iovec_init(&dbs->iov, sg->nsg);
dma_bdrv_cb(dbs, 0);
- if (!dbs->acb) {
- qemu_aio_release(dbs);
- return NULL;
- }
return &dbs->common;
}
@@ -170,12 +188,12 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0);
+ return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false);
}
BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
void (*cb)(void *opaque, int ret), void *opaque)
{
- return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1);
+ return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true);
}
diff --git a/dma.h b/dma.h
index a6db5ba..2bdc236 100644
--- a/dma.h
+++ b/dma.h
@@ -15,10 +15,13 @@
#include "hw/hw.h"
#include "block.h"
-typedef struct {
+typedef struct ScatterGatherEntry ScatterGatherEntry;
+
+#if defined(TARGET_PHYS_ADDR_BITS)
+struct ScatterGatherEntry {
target_phys_addr_t base;
target_phys_addr_t len;
-} ScatterGatherEntry;
+};
struct QEMUSGList {
ScatterGatherEntry *sg;
@@ -31,6 +34,7 @@ void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
target_phys_addr_t len);
void qemu_sglist_destroy(QEMUSGList *qsg);
+#endif
typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
@@ -39,7 +43,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num,
BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector_num,
DMAIOFunc *io_func, BlockDriverCompletionFunc *cb,
- void *opaque, int is_write);
+ void *opaque, bool to_dev);
BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs,
QEMUSGList *sg, uint64_t sector,
BlockDriverCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index b813236..226230c 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -499,10 +499,7 @@ static void ahci_reset_port(AHCIState *s, int port)
ide_bus_reset(&d->port);
ide_state->ncq_queues = AHCI_MAX_CMDS;
- pr->irq_stat = 0;
- pr->irq_mask = 0;
pr->scr_stat = 0;
- pr->scr_ctl = 0;
pr->scr_err = 0;
pr->scr_act = 0;
d->busy_slot = -1;
@@ -1159,12 +1156,17 @@ void ahci_uninit(AHCIState *s)
void ahci_reset(void *opaque)
{
struct AHCIPCIState *d = opaque;
+ AHCIPortRegs *pr;
int i;
d->ahci.control_regs.irqstatus = 0;
d->ahci.control_regs.ghc = 0;
for (i = 0; i < d->ahci.ports; i++) {
+ pr = &d->ahci.dev[i].port_regs;
+ pr->irq_stat = 0;
+ pr->irq_mask = 0;
+ pr->scr_ctl = 0;
ahci_reset_port(&d->ahci, i);
}
}
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 4d5a076..4e76fc7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -603,7 +603,7 @@ handle_rw_error:
break;
case IDE_DMA_TRIM:
s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
- ide_issue_trim, ide_dma_cb, s, 1);
+ ide_issue_trim, ide_dma_cb, s, true);
break;
}
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index c1844cb..37b8239 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -156,7 +156,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
break;
case IDE_DMA_TRIM:
m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num,
- ide_issue_trim, pmac_ide_transfer_cb, s, 1);
+ ide_issue_trim, pmac_ide_transfer_cb, s, true);
break;
}
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 0248294..aca65a1 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -542,15 +542,15 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
break;
case 1:
case 2:
- cmd->xfer = buf[8] | (buf[7] << 8);
+ cmd->xfer = lduw_be_p(&buf[7]);
cmd->len = 10;
break;
case 4:
- cmd->xfer = buf[13] | (buf[12] << 8) | (buf[11] << 16) | (buf[10] << 24);
+ cmd->xfer = ldl_be_p(&buf[10]);
cmd->len = 16;
break;
case 5:
- cmd->xfer = buf[9] | (buf[8] << 8) | (buf[7] << 16) | (buf[6] << 24);
+ cmd->xfer = ldl_be_p(&buf[6]);
cmd->len = 12;
break;
default:
@@ -710,23 +710,15 @@ static uint64_t scsi_cmd_lba(SCSICommand *cmd)
switch (buf[0] >> 5) {
case 0:
- lba = (uint64_t) buf[3] | ((uint64_t) buf[2] << 8) |
- (((uint64_t) buf[1] & 0x1f) << 16);
+ lba = ldl_be_p(&buf[0]) & 0x1fffff;
break;
case 1:
case 2:
- lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) |
- ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24);
+ case 5:
+ lba = ldl_be_p(&buf[2]);
break;
case 4:
- lba = (uint64_t) buf[9] | ((uint64_t) buf[8] << 8) |
- ((uint64_t) buf[7] << 16) | ((uint64_t) buf[6] << 24) |
- ((uint64_t) buf[5] << 32) | ((uint64_t) buf[4] << 40) |
- ((uint64_t) buf[3] << 48) | ((uint64_t) buf[2] << 56);
- break;
- case 5:
- lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) |
- ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24);
+ lba = ldq_be_p(&buf[2]);
break;
default:
lba = -1;
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 96c554f..e843f71 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -55,6 +55,7 @@ typedef struct SCSIDiskReq {
/* Both sector and sector_count are in terms of qemu 512 byte blocks. */
uint64_t sector;
uint32_t sector_count;
+ uint32_t buflen;
struct iovec iov;
QEMUIOVector qiov;
uint32_t status;
@@ -78,13 +79,15 @@ struct SCSIDiskState
};
static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type);
-static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf);
+static int scsi_disk_emulate_command(SCSIDiskReq *r);
static void scsi_free_request(SCSIRequest *req)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
- qemu_vfree(r->iov.iov_base);
+ if (r->iov.iov_base) {
+ qemu_vfree(r->iov.iov_base);
+ }
}
/* Helper function for command completion with sense. */
@@ -108,6 +111,19 @@ static void scsi_cancel_io(SCSIRequest *req)
r->req.aiocb = NULL;
}
+static uint32_t scsi_init_iovec(SCSIDiskReq *r)
+{
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+ if (!r->iov.iov_base) {
+ r->buflen = SCSI_DMA_BUF_SIZE;
+ r->iov.iov_base = qemu_blockalign(s->bs, r->buflen);
+ }
+ r->iov.iov_len = MIN(r->sector_count * 512, r->buflen);
+ qemu_iovec_init_external(&r->qiov, &r->iov, 1);
+ return r->qiov.size / 512;
+}
+
static void scsi_read_complete(void * opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -125,12 +141,12 @@ static void scsi_read_complete(void * opaque, int ret)
}
}
- DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->iov.iov_len);
+ DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
- n = r->iov.iov_len / 512;
+ n = r->qiov.size / 512;
r->sector += n;
r->sector_count -= n;
- scsi_req_data(&r->req, r->iov.iov_len);
+ scsi_req_data(&r->req, r->qiov.size);
}
static void scsi_flush_complete(void * opaque, int ret)
@@ -181,16 +197,10 @@ static void scsi_read_data(SCSIRequest *req)
return;
}
- n = r->sector_count;
- if (n > SCSI_DMA_BUF_SIZE / 512)
- n = SCSI_DMA_BUF_SIZE / 512;
-
if (s->tray_open) {
scsi_read_complete(r, -ENOMEDIUM);
}
- r->iov.iov_len = n * 512;
- qemu_iovec_init_external(&r->qiov, &r->iov, 1);
-
+ n = scsi_init_iovec(r);
bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ);
r->req.aiocb = bdrv_aio_readv(s->bs, r->sector, &r->qiov, n,
scsi_read_complete, r);
@@ -239,7 +249,6 @@ static void scsi_write_complete(void * opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- uint32_t len;
uint32_t n;
if (r->req.aiocb != NULL) {
@@ -253,19 +262,15 @@ static void scsi_write_complete(void * opaque, int ret)
}
}
- n = r->iov.iov_len / 512;
+ n = r->qiov.size / 512;
r->sector += n;
r->sector_count -= n;
if (r->sector_count == 0) {
scsi_req_complete(&r->req, GOOD);
} else {
- len = r->sector_count * 512;
- if (len > SCSI_DMA_BUF_SIZE) {
- len = SCSI_DMA_BUF_SIZE;
- }
- r->iov.iov_len = len;
- DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, len);
- scsi_req_data(&r->req, len);
+ scsi_init_iovec(r);
+ DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, r->qiov.size);
+ scsi_req_data(&r->req, r->qiov.size);
}
}
@@ -284,21 +289,19 @@ static void scsi_write_data(SCSIRequest *req)
return;
}
- n = r->iov.iov_len / 512;
+ n = r->qiov.size / 512;
if (n) {
if (s->tray_open) {
scsi_write_complete(r, -ENOMEDIUM);
}
- qemu_iovec_init_external(&r->qiov, &r->iov, 1);
-
bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE);
r->req.aiocb = bdrv_aio_writev(s->bs, r->sector, &r->qiov, n,
- scsi_write_complete, r);
+ scsi_write_complete, r);
if (r->req.aiocb == NULL) {
scsi_write_complete(r, -ENOMEM);
}
} else {
- /* Invoke completion routine to fetch data from host. */
+ /* Called for the first time. Ask the driver to send us more data. */
scsi_write_complete(r, 0);
}
}
@@ -329,7 +332,7 @@ static void scsi_dma_restart_bh(void *opaque)
scsi_write_data(&r->req);
break;
case SCSI_REQ_STATUS_RETRY_FLUSH:
- ret = scsi_disk_emulate_command(r, r->iov.iov_base);
+ ret = scsi_disk_emulate_command(r);
if (ret == 0) {
scsi_req_complete(&r->req, GOOD);
}
@@ -844,13 +847,31 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r)
return 0;
}
-static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf)
+static int scsi_disk_emulate_command(SCSIDiskReq *r)
{
SCSIRequest *req = &r->req;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
uint64_t nb_sectors;
+ uint8_t *outbuf;
int buflen = 0;
+ if (!r->iov.iov_base) {
+ /*
+ * FIXME: we shouldn't return anything bigger than 4k, but the code
+ * requires the buffer to be as big as req->cmd.xfer in several
+ * places. So, do not allow CDBs with a very large ALLOCATION
+ * LENGTH. The real fix would be to modify scsi_read_data and
+ * dma_buf_read, so that they return data beyond the buflen
+ * as all zeros.
+ */
+ if (req->cmd.xfer > 65536) {
+ goto illegal_request;
+ }
+ r->buflen = MAX(4096, req->cmd.xfer);
+ r->iov.iov_base = qemu_blockalign(s->bs, r->buflen);
+ }
+
+ outbuf = r->iov.iov_base;
switch (req->cmd.buf[0]) {
case TEST_UNIT_READY:
if (s->tray_open || !bdrv_is_inserted(s->bs))
@@ -1001,11 +1022,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
int32_t len;
uint8_t command;
- uint8_t *outbuf;
int rc;
command = buf[0];
- outbuf = (uint8_t *)r->iov.iov_base;
DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", req->lun, req->tag, buf[0]);
#ifdef DEBUG_SCSI
@@ -1034,7 +1053,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf)
case GET_CONFIGURATION:
case SERVICE_ACTION_IN_16:
case VERIFY_10:
- rc = scsi_disk_emulate_command(r, outbuf);
+ rc = scsi_disk_emulate_command(r);
if (rc < 0) {
return 0;
}
@@ -1285,11 +1304,8 @@ static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag,
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d);
SCSIRequest *req;
- SCSIDiskReq *r;
req = scsi_req_alloc(&scsi_disk_reqops, &s->qdev, tag, lun, hba_private);
- r = DO_UPCAST(SCSIDiskReq, req, req);
- r->iov.iov_base = qemu_blockalign(s->bs, SCSI_DMA_BUF_SIZE);
return req;
}
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index 5ce01af..8f6b70d 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -244,12 +244,6 @@ static uint8_t *scsi_get_buf(SCSIRequest *req)
static void scsi_req_fixup(SCSIRequest *req)
{
switch(req->cmd.buf[0]) {
- case WRITE_10:
- req->cmd.buf[1] &= ~0x08; /* disable FUA */
- break;
- case READ_10:
- req->cmd.buf[1] &= ~0x08; /* disable FUA */
- break;
case REWIND:
case START_STOP:
if (req->dev->type == TYPE_TAPE) {
diff --git a/linux-aio.c b/linux-aio.c
index 5265a02..bffa6cd 100644
--- a/linux-aio.c
+++ b/linux-aio.c
@@ -68,15 +68,6 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
qemu_aio_release(laiocb);
}
-/*
- * All requests are directly processed when they complete, so there's nothing
- * left to do during qemu_aio_wait().
- */
-static int qemu_laio_process_requests(void *opaque)
-{
- return 0;
-}
-
static void qemu_laio_completion_cb(void *opaque)
{
struct qemu_laio_state *s = opaque;
@@ -215,7 +206,7 @@ void *laio_init(void)
goto out_close_efd;
qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
- qemu_laio_flush_cb, qemu_laio_process_requests, s);
+ qemu_laio_flush_cb, NULL, s);
return s;
diff --git a/nbd.c b/nbd.c
index 6d81cfb..595f4d8 100644
--- a/nbd.c
+++ b/nbd.c
@@ -30,6 +30,10 @@
#include <ctype.h>
#include <inttypes.h>
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
#include "qemu_socket.h"
//#define DEBUG_NBD
@@ -63,6 +67,8 @@
#define NBD_PRINT_DEBUG _IO(0xab, 6)
#define NBD_SET_SIZE_BLOCKS _IO(0xab, 7)
#define NBD_DISCONNECT _IO(0xab, 8)
+#define NBD_SET_TIMEOUT _IO(0xab, 9)
+#define NBD_SET_FLAGS _IO(0xab, 10)
#define NBD_OPT_EXPORT_NAME (1 << 0)
@@ -172,7 +178,7 @@ int unix_socket_outgoing(const char *path)
Request (type == 2)
*/
-int nbd_negotiate(int csock, off_t size)
+int nbd_negotiate(int csock, off_t size, uint32_t flags)
{
char buf[8 + 8 + 8 + 128];
@@ -180,14 +186,16 @@ int nbd_negotiate(int csock, off_t size)
[ 0 .. 7] passwd ("NBDMAGIC")
[ 8 .. 15] magic (0x00420281861253)
[16 .. 23] size
- [24 .. 151] reserved (0)
+ [24 .. 27] flags
+ [28 .. 151] reserved (0)
*/
TRACE("Beginning negotiation.");
memcpy(buf, "NBDMAGIC", 8);
cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
cpu_to_be64w((uint64_t*)(buf + 16), size);
- memset(buf + 24, 0, 128);
+ cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS);
+ memset(buf + 28, 0, 124);
if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
LOG("write failed");
@@ -337,8 +345,8 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
return 0;
}
-#ifndef _WIN32
-int nbd_init(int fd, int csock, off_t size, size_t blocksize)
+#ifdef __linux__
+int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
{
TRACE("Setting block size to %lu", (unsigned long)blocksize);
@@ -358,6 +366,26 @@ int nbd_init(int fd, int csock, off_t size, size_t blocksize)
return -1;
}
+ if (flags & NBD_FLAG_READ_ONLY) {
+ int read_only = 1;
+ TRACE("Setting readonly attribute");
+
+ if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
+ int serrno = errno;
+ LOG("Failed setting read-only attribute");
+ errno = serrno;
+ return -1;
+ }
+ }
+
+ if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
+ && errno != ENOTTY) {
+ int serrno = errno;
+ LOG("Failed setting flags");
+ errno = serrno;
+ return -1;
+ }
+
TRACE("Clearing NBD socket");
if (ioctl(fd, NBD_CLEAR_SOCK) == -1) {
@@ -548,7 +576,7 @@ static int nbd_send_reply(int csock, struct nbd_reply *reply)
}
int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
- off_t *offset, bool readonly, uint8_t *data, int data_size)
+ off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size)
{
struct nbd_request request;
struct nbd_reply reply;
@@ -632,7 +660,7 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
return -1;
}
- if (readonly) {
+ if (nbdflags & NBD_FLAG_READ_ONLY) {
TRACE("Server is read-only, return error");
reply.error = 1;
} else {
diff --git a/nbd.h b/nbd.h
index df7b7af..61553f4 100644
--- a/nbd.h
+++ b/nbd.h
@@ -37,10 +37,22 @@ struct nbd_reply {
uint64_t handle;
} QEMU_PACKED;
+#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
+#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */
+#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
+#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
+
+#define NBD_CMD_MASK_COMMAND 0x0000ffff
+#define NBD_CMD_FLAG_FUA (1 << 16)
+
enum {
NBD_CMD_READ = 0,
NBD_CMD_WRITE = 1,
- NBD_CMD_DISC = 2
+ NBD_CMD_DISC = 2,
+ NBD_CMD_FLUSH = 3,
+ NBD_CMD_TRIM = 4
};
#define NBD_DEFAULT_PORT 10809
@@ -53,14 +65,14 @@ int tcp_socket_incoming_spec(const char *address_and_port);
int unix_socket_outgoing(const char *path);
int unix_socket_incoming(const char *path);
-int nbd_negotiate(int csock, off_t size);
+int nbd_negotiate(int csock, off_t size, uint32_t flags);
int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
off_t *size, size_t *blocksize);
-int nbd_init(int fd, int csock, off_t size, size_t blocksize);
+int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize);
int nbd_send_request(int csock, struct nbd_request *request);
int nbd_receive_reply(int csock, struct nbd_reply *reply);
int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
- off_t *offset, bool readonly, uint8_t *data, int data_size);
+ off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size);
int nbd_client(int fd);
int nbd_disconnect(int fd);
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 3193dbf..d3c1174 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -42,7 +42,6 @@ struct qemu_paiocb {
int aio_niov;
size_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
- int ev_signo;
off_t aio_offset;
QTAILQ_ENTRY(qemu_paiocb) node;
@@ -181,7 +180,6 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
{
- size_t offset = 0;
ssize_t len;
do {
@@ -189,12 +187,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
len = qemu_pwritev(aiocb->aio_fildes,
aiocb->aio_iov,
aiocb->aio_niov,
- aiocb->aio_offset + offset);
+ aiocb->aio_offset);
else
len = qemu_preadv(aiocb->aio_fildes,
aiocb->aio_iov,
aiocb->aio_niov,
- aiocb->aio_offset + offset);
+ aiocb->aio_offset);
} while (len == -1 && errno == EINTR);
if (len == -1)
@@ -309,12 +307,10 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
return nbytes;
}
+static void posix_aio_notify_event(void);
+
static void *aio_thread(void *unused)
{
- pid_t pid;
-
- pid = getpid();
-
mutex_lock(&lock);
pending_threads--;
mutex_unlock(&lock);
@@ -381,7 +377,7 @@ static void *aio_thread(void *unused)
aiocb->ret = ret;
mutex_unlock(&lock);
- if (kill(pid, aiocb->ev_signo)) die("kill failed");
+ posix_aio_notify_event();
}
cur_threads--;
@@ -548,18 +544,14 @@ static int posix_aio_flush(void *opaque)
static PosixAioState *posix_aio_state;
-static void aio_signal_handler(int signum)
+static void posix_aio_notify_event(void)
{
- if (posix_aio_state) {
- char byte = 0;
- ssize_t ret;
-
- ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
- if (ret < 0 && errno != EAGAIN)
- die("write()");
- }
+ char byte = 0;
+ ssize_t ret;
- qemu_service_io();
+ ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
+ if (ret < 0 && errno != EAGAIN)
+ die("write()");
}
static void paio_remove(struct qemu_paiocb *acb)
@@ -623,7 +615,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
return NULL;
acb->aio_type = type;
acb->aio_fildes = fd;
- acb->ev_signo = SIGUSR2;
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -651,7 +642,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
return NULL;
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = fd;
- acb->ev_signo = SIGUSR2;
acb->aio_offset = 0;
acb->aio_ioctl_buf = buf;
acb->aio_ioctl_cmd = req;
@@ -665,7 +655,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
int paio_init(void)
{
- struct sigaction act;
PosixAioState *s;
int fds[2];
int ret;
@@ -675,11 +664,6 @@ int paio_init(void)
s = g_malloc(sizeof(PosixAioState));
- sigfillset(&act.sa_mask);
- act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
- act.sa_handler = aio_signal_handler;
- sigaction(SIGUSR2, &act, NULL);
-
s->first_aio = NULL;
if (qemu_pipe(fds) == -1) {
fprintf(stderr, "failed to create pipe\n");
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 3a39145..d8d3e15 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -185,7 +185,7 @@ int main(int argc, char **argv)
BlockDriverState *bs;
off_t dev_offset = 0;
off_t offset = 0;
- bool readonly = false;
+ uint32_t nbdflags = 0;
bool disconnect = false;
const char *bindto = "0.0.0.0";
int port = NBD_DEFAULT_PORT;
@@ -230,7 +230,6 @@ int main(int argc, char **argv)
int nb_fds = 0;
int max_fd;
int persistent = 0;
- uint32_t nbdflags;
while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
switch (ch) {
@@ -263,7 +262,7 @@ int main(int argc, char **argv)
}
break;
case 'r':
- readonly = true;
+ nbdflags |= NBD_FLAG_READ_ONLY;
flags &= ~BDRV_O_RDWR;
break;
case 'P':
@@ -398,13 +397,13 @@ int main(int argc, char **argv)
}
ret = nbd_receive_negotiate(sock, NULL, &nbdflags,
- &size, &blocksize);
+ &size, &blocksize);
if (ret == -1) {
ret = 1;
goto out;
}
- ret = nbd_init(fd, sock, size, blocksize);
+ ret = nbd_init(fd, sock, nbdflags, size, blocksize);
if (ret == -1) {
ret = 1;
goto out;
@@ -463,7 +462,7 @@ int main(int argc, char **argv)
for (i = 1; i < nb_fds && ret; i++) {
if (FD_ISSET(sharing_fds[i], &fds)) {
if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset,
- &offset, readonly, data, NBD_BUFFER_SIZE) != 0) {
+ &offset, nbdflags, data, NBD_BUFFER_SIZE) != 0) {
close(sharing_fds[i]);
nb_fds--;
sharing_fds[i] = sharing_fds[nb_fds];
@@ -479,7 +478,7 @@ int main(int argc, char **argv)
(struct sockaddr *)&addr,
&addr_len);
if (sharing_fds[nb_fds] != -1 &&
- nbd_negotiate(sharing_fds[nb_fds], fd_size) != -1) {
+ nbd_negotiate(sharing_fds[nb_fds], fd_size, nbdflags) != -1) {
if (sharing_fds[nb_fds] > max_fd)
max_fd = sharing_fds[nb_fds];
nb_fds++;