diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-09-20 15:21:03 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-09-20 15:21:03 -0500 |
commit | c8af89af96dac46922f6664cf0ca238a6b1adf0d (patch) | |
tree | 1f484f739ba85f5f9d71c3128abcbf99108a8c7c | |
parent | 39ba59c21bdc3b1dda09219919b87fe001e7d5d9 (diff) | |
parent | 16a06b24306b5733a4ef2e585964838e47735a54 (diff) | |
download | qemu-c8af89af96dac46922f6664cf0ca238a6b1adf0d.zip qemu-c8af89af96dac46922f6664cf0ca238a6b1adf0d.tar.gz qemu-c8af89af96dac46922f6664cf0ca238a6b1adf0d.tar.bz2 |
Merge remote-tracking branch 'kwolf/for-anthony' into staging
-rw-r--r-- | block/nbd.c | 4 | ||||
-rw-r--r-- | block/raw-posix.c | 9 | ||||
-rw-r--r-- | block/rbd.c | 83 | ||||
-rw-r--r-- | block/vmdk.c | 14 | ||||
-rw-r--r-- | cpus.c | 5 | ||||
-rw-r--r-- | dma-helpers.c | 58 | ||||
-rw-r--r-- | dma.h | 10 | ||||
-rw-r--r-- | hw/ide/ahci.c | 8 | ||||
-rw-r--r-- | hw/ide/core.c | 2 | ||||
-rw-r--r-- | hw/ide/macio.c | 2 | ||||
-rw-r--r-- | hw/scsi-bus.c | 22 | ||||
-rw-r--r-- | hw/scsi-disk.c | 84 | ||||
-rw-r--r-- | hw/scsi-generic.c | 6 | ||||
-rw-r--r-- | linux-aio.c | 11 | ||||
-rw-r--r-- | nbd.c | 42 | ||||
-rw-r--r-- | nbd.h | 20 | ||||
-rw-r--r-- | posix-aio-compat.c | 38 | ||||
-rw-r--r-- | qemu-nbd.c | 13 |
18 files changed, 254 insertions, 177 deletions
diff --git a/block/nbd.c b/block/nbd.c index 70edd81..76f04d8 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -48,6 +48,7 @@ typedef struct BDRVNBDState { int sock; + uint32_t nbdflags; off_t size; size_t blocksize; char *export_name; /* An NBD server may export several devices */ @@ -111,7 +112,6 @@ static int nbd_establish_connection(BlockDriverState *bs) int ret; off_t size; size_t blocksize; - uint32_t nbdflags; if (s->host_spec[0] == '/') { sock = unix_socket_outgoing(s->host_spec); @@ -126,7 +126,7 @@ static int nbd_establish_connection(BlockDriverState *bs) } /* NBD handshake */ - ret = nbd_receive_negotiate(sock, s->export_name, &nbdflags, &size, + ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size, &blocksize); if (ret == -1) { logout("Failed to negotiate with the NBD server\n"); diff --git a/block/raw-posix.c b/block/raw-posix.c index a624f56..305998d 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -839,7 +839,14 @@ static int raw_create(const char *filename, QEMUOptionParameter *options) static int raw_flush(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; - return qemu_fdatasync(s->fd); + int ret; + + ret = qemu_fdatasync(s->fd); + if (ret < 0) { + return -errno; + } + + return 0; } #ifdef CONFIG_XFS diff --git a/block/rbd.c b/block/rbd.c index 1b78d51..3068c82 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -13,35 +13,33 @@ #include "qemu-common.h" #include "qemu-error.h" - #include "block_int.h" #include <rbd/librbd.h> - - /* * When specifying the image filename use: * * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]] * - * poolname must be the name of an existing rados pool + * poolname must be the name of an existing rados pool. * - * devicename is the basename for all objects used to - * emulate the raw device. + * devicename is the name of the rbd image. * - * Each option given is used to configure rados, and may be - * any Ceph option, or "conf". The "conf" option specifies - * a Ceph configuration file to read. + * Each option given is used to configure rados, and may be any valid + * Ceph option, "id", or "conf". * - * Metadata information (image size, ...) is stored in an - * object with the name "devicename.rbd". + * The "id" option indicates what user we should authenticate as to + * the Ceph cluster. If it is excluded we will use the Ceph default + * (normally 'admin'). * - * The raw device is split into 4MB sized objects by default. - * The sequencenumber is encoded in a 12 byte long hex-string, - * and is attached to the devicename, separated by a dot. - * e.g. "devicename.1234567890ab" + * The "conf" option specifies a Ceph configuration file to read. If + * it is not specified, we will read from the default Ceph locations + * (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration + * file, specify conf=/dev/null. * + * Configuration values containing :, @, or = can be escaped with a + * leading "\". */ #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) @@ -104,8 +102,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len, *p = NULL; if (delim != '\0') { - end = strchr(src, delim); - if (end) { + for (end = src; *end; ++end) { + if (*end == delim) { + break; + } + if (*end == '\\' && end[1] != '\0') { + end++; + } + } + if (*end == delim) { *p = end + 1; *end = '\0'; } @@ -124,6 +129,19 @@ static int qemu_rbd_next_tok(char *dst, int dst_len, return 0; } +static void qemu_rbd_unescape(char *src) +{ + char *p; + + for (p = src; *src; ++src, ++p) { + if (*src == '\\' && src[1] != '\0') { + src++; + } + *p = *src; + } + *p = '\0'; +} + static int qemu_rbd_parsename(const char *filename, char *pool, int pool_len, char *snap, int snap_len, @@ -148,6 +166,7 @@ static int qemu_rbd_parsename(const char *filename, ret = -EINVAL; goto done; } + qemu_rbd_unescape(pool); if (strchr(p, '@')) { ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p); @@ -155,9 +174,11 @@ static int qemu_rbd_parsename(const char *filename, goto done; } ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p); + qemu_rbd_unescape(snap); } else { ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p); } + qemu_rbd_unescape(name); if (ret < 0 || !p) { goto done; } @@ -213,6 +234,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf) if (ret < 0) { break; } + qemu_rbd_unescape(name); if (!p) { error_report("conf option %s has no value", name); @@ -225,6 +247,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf) if (ret < 0) { break; } + qemu_rbd_unescape(value); if (strcmp(name, "conf") == 0) { ret = rados_conf_read_file(cluster, value); @@ -298,11 +321,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options) } if (strstr(conf, "conf=") == NULL) { - if (rados_conf_read_file(cluster, NULL) < 0) { - error_report("error reading config file"); - rados_shutdown(cluster); - return -EIO; - } + /* try default location, but ignore failure */ + rados_conf_read_file(cluster, NULL); } if (conf[0] != '\0' && @@ -441,11 +461,8 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) } if (strstr(conf, "conf=") == NULL) { - r = rados_conf_read_file(s->cluster, NULL); - if (r < 0) { - error_report("error reading config file"); - goto failed_shutdown; - } + /* try default location, but ignore failure */ + rados_conf_read_file(s->cluster, NULL); } if (conf[0] != '\0') { @@ -688,6 +705,17 @@ static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs, return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } +static int qemu_rbd_flush(BlockDriverState *bs) +{ +#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1) + /* rbd_flush added in 0.1.1 */ + BDRVRBDState *s = bs->opaque; + return rbd_flush(s->image); +#else + return 0; +#endif +} + static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVRBDState *s = bs->opaque; @@ -823,6 +851,7 @@ static BlockDriver bdrv_rbd = { .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_create = qemu_rbd_create, + .bdrv_flush = qemu_rbd_flush, .bdrv_get_info = qemu_rbd_getinfo, .create_options = qemu_rbd_create_options, .bdrv_getlength = qemu_rbd_getlength, diff --git a/block/vmdk.c b/block/vmdk.c index 6c8edfc..5d16ec4 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -179,11 +179,16 @@ static void vmdk_free_extents(BlockDriverState *bs) { int i; BDRVVmdkState *s = bs->opaque; + VmdkExtent *e; for (i = 0; i < s->num_extents; i++) { - g_free(s->extents[i].l1_table); - g_free(s->extents[i].l2_cache); - g_free(s->extents[i].l1_backup_table); + e = &s->extents[i]; + g_free(e->l1_table); + g_free(e->l2_cache); + g_free(e->l1_backup_table); + if (e->file != bs->file) { + bdrv_delete(e->file); + } } g_free(s->extents); } @@ -619,12 +624,13 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, s->desc_offset = 0; ret = vmdk_parse_extents(buf, bs, bs->file->filename); if (ret) { + vmdk_free_extents(bs); return ret; } /* try to open parent images, if exist */ if (vmdk_parent_open(bs)) { - g_free(s->extents); + vmdk_free_extents(bs); return -EINVAL; } s->parent_cid = vmdk_read_cid(bs, 1); @@ -380,11 +380,6 @@ static int qemu_signal_init(void) int sigfd; sigset_t set; - /* SIGUSR2 used by posix-aio-compat.c */ - sigemptyset(&set); - sigaddset(&set, SIGUSR2); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); - /* * SIG_IPI must be blocked in the main thread and must not be caught * by sigwait() in the signal thread. Otherwise, the cpu thread will diff --git a/dma-helpers.c b/dma-helpers.c index 4610ea0..86d2d0a 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -42,7 +42,8 @@ typedef struct { BlockDriverAIOCB *acb; QEMUSGList *sg; uint64_t sector_num; - int is_write; + bool to_dev; + bool in_cancel; int sg_cur_index; target_phys_addr_t sg_cur_byte; QEMUIOVector iov; @@ -58,7 +59,7 @@ static void reschedule_dma(void *opaque) qemu_bh_delete(dbs->bh); dbs->bh = NULL; - dma_bdrv_cb(opaque, 0); + dma_bdrv_cb(dbs, 0); } static void continue_after_map_failure(void *opaque) @@ -75,9 +76,29 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs) for (i = 0; i < dbs->iov.niov; ++i) { cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base, - dbs->iov.iov[i].iov_len, !dbs->is_write, + dbs->iov.iov[i].iov_len, !dbs->to_dev, dbs->iov.iov[i].iov_len); } + qemu_iovec_reset(&dbs->iov); +} + +static void dma_complete(DMAAIOCB *dbs, int ret) +{ + dma_bdrv_unmap(dbs); + if (dbs->common.cb) { + dbs->common.cb(dbs->common.opaque, ret); + } + qemu_iovec_destroy(&dbs->iov); + if (dbs->bh) { + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + } + if (!dbs->in_cancel) { + /* Requests may complete while dma_aio_cancel is in progress. In + * this case, the AIOCB should not be released because it is still + * referenced by dma_aio_cancel. */ + qemu_aio_release(dbs); + } } static void dma_bdrv_cb(void *opaque, int ret) @@ -89,19 +110,16 @@ static void dma_bdrv_cb(void *opaque, int ret) dbs->acb = NULL; dbs->sector_num += dbs->iov.size / 512; dma_bdrv_unmap(dbs); - qemu_iovec_reset(&dbs->iov); if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dbs->common.cb(dbs->common.opaque, ret); - qemu_iovec_destroy(&dbs->iov); - qemu_aio_release(dbs); + dma_complete(dbs, ret); return; } while (dbs->sg_cur_index < dbs->sg->nsg) { cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; - mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->is_write); + mem = cpu_physical_memory_map(cur_addr, &cur_len, !dbs->to_dev); if (!mem) break; qemu_iovec_add(&dbs->iov, mem, cur_len); @@ -120,9 +138,7 @@ static void dma_bdrv_cb(void *opaque, int ret) dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov, dbs->iov.size / 512, dma_bdrv_cb, dbs); if (!dbs->acb) { - dma_bdrv_unmap(dbs); - qemu_iovec_destroy(&dbs->iov); - return; + dma_complete(dbs, -EIO); } } @@ -131,8 +147,14 @@ static void dma_aio_cancel(BlockDriverAIOCB *acb) DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); if (dbs->acb) { - bdrv_aio_cancel(dbs->acb); + BlockDriverAIOCB *acb = dbs->acb; + dbs->acb = NULL; + dbs->in_cancel = true; + bdrv_aio_cancel(acb); + dbs->in_cancel = false; } + dbs->common.cb = NULL; + dma_complete(dbs, 0); } static AIOPool dma_aio_pool = { @@ -143,7 +165,7 @@ static AIOPool dma_aio_pool = { BlockDriverAIOCB *dma_bdrv_io( BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num, DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, - void *opaque, int is_write) + void *opaque, bool to_dev) { DMAAIOCB *dbs = qemu_aio_get(&dma_aio_pool, bs, cb, opaque); @@ -153,15 +175,11 @@ BlockDriverAIOCB *dma_bdrv_io( dbs->sector_num = sector_num; dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; - dbs->is_write = is_write; + dbs->to_dev = to_dev; dbs->io_func = io_func; dbs->bh = NULL; qemu_iovec_init(&dbs->iov, sg->nsg); dma_bdrv_cb(dbs, 0); - if (!dbs->acb) { - qemu_aio_release(dbs); - return NULL; - } return &dbs->common; } @@ -170,12 +188,12 @@ BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, 0); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_readv, cb, opaque, false); } BlockDriverAIOCB *dma_bdrv_write(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { - return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, 1); + return dma_bdrv_io(bs, sg, sector, bdrv_aio_writev, cb, opaque, true); } @@ -15,10 +15,13 @@ #include "hw/hw.h" #include "block.h" -typedef struct { +typedef struct ScatterGatherEntry ScatterGatherEntry; + +#if defined(TARGET_PHYS_ADDR_BITS) +struct ScatterGatherEntry { target_phys_addr_t base; target_phys_addr_t len; -} ScatterGatherEntry; +}; struct QEMUSGList { ScatterGatherEntry *sg; @@ -31,6 +34,7 @@ void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint); void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base, target_phys_addr_t len); void qemu_sglist_destroy(QEMUSGList *qsg); +#endif typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, @@ -39,7 +43,7 @@ typedef BlockDriverAIOCB *DMAIOFunc(BlockDriverState *bs, int64_t sector_num, BlockDriverAIOCB *dma_bdrv_io(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector_num, DMAIOFunc *io_func, BlockDriverCompletionFunc *cb, - void *opaque, int is_write); + void *opaque, bool to_dev); BlockDriverAIOCB *dma_bdrv_read(BlockDriverState *bs, QEMUSGList *sg, uint64_t sector, BlockDriverCompletionFunc *cb, void *opaque); diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index b813236..226230c 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -499,10 +499,7 @@ static void ahci_reset_port(AHCIState *s, int port) ide_bus_reset(&d->port); ide_state->ncq_queues = AHCI_MAX_CMDS; - pr->irq_stat = 0; - pr->irq_mask = 0; pr->scr_stat = 0; - pr->scr_ctl = 0; pr->scr_err = 0; pr->scr_act = 0; d->busy_slot = -1; @@ -1159,12 +1156,17 @@ void ahci_uninit(AHCIState *s) void ahci_reset(void *opaque) { struct AHCIPCIState *d = opaque; + AHCIPortRegs *pr; int i; d->ahci.control_regs.irqstatus = 0; d->ahci.control_regs.ghc = 0; for (i = 0; i < d->ahci.ports; i++) { + pr = &d->ahci.dev[i].port_regs; + pr->irq_stat = 0; + pr->irq_mask = 0; + pr->scr_ctl = 0; ahci_reset_port(&d->ahci, i); } } diff --git a/hw/ide/core.c b/hw/ide/core.c index 4d5a076..4e76fc7 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -603,7 +603,7 @@ handle_rw_error: break; case IDE_DMA_TRIM: s->bus->dma->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, - ide_issue_trim, ide_dma_cb, s, 1); + ide_issue_trim, ide_dma_cb, s, true); break; } diff --git a/hw/ide/macio.c b/hw/ide/macio.c index c1844cb..37b8239 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -156,7 +156,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) break; case IDE_DMA_TRIM: m->aiocb = dma_bdrv_io(s->bs, &s->sg, sector_num, - ide_issue_trim, pmac_ide_transfer_cb, s, 1); + ide_issue_trim, pmac_ide_transfer_cb, s, true); break; } diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c index 0248294..aca65a1 100644 --- a/hw/scsi-bus.c +++ b/hw/scsi-bus.c @@ -542,15 +542,15 @@ static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf) break; case 1: case 2: - cmd->xfer = buf[8] | (buf[7] << 8); + cmd->xfer = lduw_be_p(&buf[7]); cmd->len = 10; break; case 4: - cmd->xfer = buf[13] | (buf[12] << 8) | (buf[11] << 16) | (buf[10] << 24); + cmd->xfer = ldl_be_p(&buf[10]); cmd->len = 16; break; case 5: - cmd->xfer = buf[9] | (buf[8] << 8) | (buf[7] << 16) | (buf[6] << 24); + cmd->xfer = ldl_be_p(&buf[6]); cmd->len = 12; break; default: @@ -710,23 +710,15 @@ static uint64_t scsi_cmd_lba(SCSICommand *cmd) switch (buf[0] >> 5) { case 0: - lba = (uint64_t) buf[3] | ((uint64_t) buf[2] << 8) | - (((uint64_t) buf[1] & 0x1f) << 16); + lba = ldl_be_p(&buf[0]) & 0x1fffff; break; case 1: case 2: - lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) | - ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24); + case 5: + lba = ldl_be_p(&buf[2]); break; case 4: - lba = (uint64_t) buf[9] | ((uint64_t) buf[8] << 8) | - ((uint64_t) buf[7] << 16) | ((uint64_t) buf[6] << 24) | - ((uint64_t) buf[5] << 32) | ((uint64_t) buf[4] << 40) | - ((uint64_t) buf[3] << 48) | ((uint64_t) buf[2] << 56); - break; - case 5: - lba = (uint64_t) buf[5] | ((uint64_t) buf[4] << 8) | - ((uint64_t) buf[3] << 16) | ((uint64_t) buf[2] << 24); + lba = ldq_be_p(&buf[2]); break; default: lba = -1; diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 96c554f..e843f71 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -55,6 +55,7 @@ typedef struct SCSIDiskReq { /* Both sector and sector_count are in terms of qemu 512 byte blocks. */ uint64_t sector; uint32_t sector_count; + uint32_t buflen; struct iovec iov; QEMUIOVector qiov; uint32_t status; @@ -78,13 +79,15 @@ struct SCSIDiskState }; static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type); -static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf); +static int scsi_disk_emulate_command(SCSIDiskReq *r); static void scsi_free_request(SCSIRequest *req) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); - qemu_vfree(r->iov.iov_base); + if (r->iov.iov_base) { + qemu_vfree(r->iov.iov_base); + } } /* Helper function for command completion with sense. */ @@ -108,6 +111,19 @@ static void scsi_cancel_io(SCSIRequest *req) r->req.aiocb = NULL; } +static uint32_t scsi_init_iovec(SCSIDiskReq *r) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + + if (!r->iov.iov_base) { + r->buflen = SCSI_DMA_BUF_SIZE; + r->iov.iov_base = qemu_blockalign(s->bs, r->buflen); + } + r->iov.iov_len = MIN(r->sector_count * 512, r->buflen); + qemu_iovec_init_external(&r->qiov, &r->iov, 1); + return r->qiov.size / 512; +} + static void scsi_read_complete(void * opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; @@ -125,12 +141,12 @@ static void scsi_read_complete(void * opaque, int ret) } } - DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->iov.iov_len); + DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size); - n = r->iov.iov_len / 512; + n = r->qiov.size / 512; r->sector += n; r->sector_count -= n; - scsi_req_data(&r->req, r->iov.iov_len); + scsi_req_data(&r->req, r->qiov.size); } static void scsi_flush_complete(void * opaque, int ret) @@ -181,16 +197,10 @@ static void scsi_read_data(SCSIRequest *req) return; } - n = r->sector_count; - if (n > SCSI_DMA_BUF_SIZE / 512) - n = SCSI_DMA_BUF_SIZE / 512; - if (s->tray_open) { scsi_read_complete(r, -ENOMEDIUM); } - r->iov.iov_len = n * 512; - qemu_iovec_init_external(&r->qiov, &r->iov, 1); - + n = scsi_init_iovec(r); bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_READ); r->req.aiocb = bdrv_aio_readv(s->bs, r->sector, &r->qiov, n, scsi_read_complete, r); @@ -239,7 +249,6 @@ static void scsi_write_complete(void * opaque, int ret) { SCSIDiskReq *r = (SCSIDiskReq *)opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - uint32_t len; uint32_t n; if (r->req.aiocb != NULL) { @@ -253,19 +262,15 @@ static void scsi_write_complete(void * opaque, int ret) } } - n = r->iov.iov_len / 512; + n = r->qiov.size / 512; r->sector += n; r->sector_count -= n; if (r->sector_count == 0) { scsi_req_complete(&r->req, GOOD); } else { - len = r->sector_count * 512; - if (len > SCSI_DMA_BUF_SIZE) { - len = SCSI_DMA_BUF_SIZE; - } - r->iov.iov_len = len; - DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, len); - scsi_req_data(&r->req, len); + scsi_init_iovec(r); + DPRINTF("Write complete tag=0x%x more=%d\n", r->req.tag, r->qiov.size); + scsi_req_data(&r->req, r->qiov.size); } } @@ -284,21 +289,19 @@ static void scsi_write_data(SCSIRequest *req) return; } - n = r->iov.iov_len / 512; + n = r->qiov.size / 512; if (n) { if (s->tray_open) { scsi_write_complete(r, -ENOMEDIUM); } - qemu_iovec_init_external(&r->qiov, &r->iov, 1); - bdrv_acct_start(s->bs, &r->acct, n * BDRV_SECTOR_SIZE, BDRV_ACCT_WRITE); r->req.aiocb = bdrv_aio_writev(s->bs, r->sector, &r->qiov, n, - scsi_write_complete, r); + scsi_write_complete, r); if (r->req.aiocb == NULL) { scsi_write_complete(r, -ENOMEM); } } else { - /* Invoke completion routine to fetch data from host. */ + /* Called for the first time. Ask the driver to send us more data. */ scsi_write_complete(r, 0); } } @@ -329,7 +332,7 @@ static void scsi_dma_restart_bh(void *opaque) scsi_write_data(&r->req); break; case SCSI_REQ_STATUS_RETRY_FLUSH: - ret = scsi_disk_emulate_command(r, r->iov.iov_base); + ret = scsi_disk_emulate_command(r); if (ret == 0) { scsi_req_complete(&r->req, GOOD); } @@ -844,13 +847,31 @@ static int scsi_disk_emulate_start_stop(SCSIDiskReq *r) return 0; } -static int scsi_disk_emulate_command(SCSIDiskReq *r, uint8_t *outbuf) +static int scsi_disk_emulate_command(SCSIDiskReq *r) { SCSIRequest *req = &r->req; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); uint64_t nb_sectors; + uint8_t *outbuf; int buflen = 0; + if (!r->iov.iov_base) { + /* + * FIXME: we shouldn't return anything bigger than 4k, but the code + * requires the buffer to be as big as req->cmd.xfer in several + * places. So, do not allow CDBs with a very large ALLOCATION + * LENGTH. The real fix would be to modify scsi_read_data and + * dma_buf_read, so that they return data beyond the buflen + * as all zeros. + */ + if (req->cmd.xfer > 65536) { + goto illegal_request; + } + r->buflen = MAX(4096, req->cmd.xfer); + r->iov.iov_base = qemu_blockalign(s->bs, r->buflen); + } + + outbuf = r->iov.iov_base; switch (req->cmd.buf[0]) { case TEST_UNIT_READY: if (s->tray_open || !bdrv_is_inserted(s->bs)) @@ -1001,11 +1022,9 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); int32_t len; uint8_t command; - uint8_t *outbuf; int rc; command = buf[0]; - outbuf = (uint8_t *)r->iov.iov_base; DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", req->lun, req->tag, buf[0]); #ifdef DEBUG_SCSI @@ -1034,7 +1053,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t *buf) case GET_CONFIGURATION: case SERVICE_ACTION_IN_16: case VERIFY_10: - rc = scsi_disk_emulate_command(r, outbuf); + rc = scsi_disk_emulate_command(r); if (rc < 0) { return 0; } @@ -1285,11 +1304,8 @@ static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, d); SCSIRequest *req; - SCSIDiskReq *r; req = scsi_req_alloc(&scsi_disk_reqops, &s->qdev, tag, lun, hba_private); - r = DO_UPCAST(SCSIDiskReq, req, req); - r->iov.iov_base = qemu_blockalign(s->bs, SCSI_DMA_BUF_SIZE); return req; } diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index 5ce01af..8f6b70d 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -244,12 +244,6 @@ static uint8_t *scsi_get_buf(SCSIRequest *req) static void scsi_req_fixup(SCSIRequest *req) { switch(req->cmd.buf[0]) { - case WRITE_10: - req->cmd.buf[1] &= ~0x08; /* disable FUA */ - break; - case READ_10: - req->cmd.buf[1] &= ~0x08; /* disable FUA */ - break; case REWIND: case START_STOP: if (req->dev->type == TYPE_TAPE) { diff --git a/linux-aio.c b/linux-aio.c index 5265a02..bffa6cd 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -68,15 +68,6 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, qemu_aio_release(laiocb); } -/* - * All requests are directly processed when they complete, so there's nothing - * left to do during qemu_aio_wait(). - */ -static int qemu_laio_process_requests(void *opaque) -{ - return 0; -} - static void qemu_laio_completion_cb(void *opaque) { struct qemu_laio_state *s = opaque; @@ -215,7 +206,7 @@ void *laio_init(void) goto out_close_efd; qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL, - qemu_laio_flush_cb, qemu_laio_process_requests, s); + qemu_laio_flush_cb, NULL, s); return s; @@ -30,6 +30,10 @@ #include <ctype.h> #include <inttypes.h> +#ifdef __linux__ +#include <linux/fs.h> +#endif + #include "qemu_socket.h" //#define DEBUG_NBD @@ -63,6 +67,8 @@ #define NBD_PRINT_DEBUG _IO(0xab, 6) #define NBD_SET_SIZE_BLOCKS _IO(0xab, 7) #define NBD_DISCONNECT _IO(0xab, 8) +#define NBD_SET_TIMEOUT _IO(0xab, 9) +#define NBD_SET_FLAGS _IO(0xab, 10) #define NBD_OPT_EXPORT_NAME (1 << 0) @@ -172,7 +178,7 @@ int unix_socket_outgoing(const char *path) Request (type == 2) */ -int nbd_negotiate(int csock, off_t size) +int nbd_negotiate(int csock, off_t size, uint32_t flags) { char buf[8 + 8 + 8 + 128]; @@ -180,14 +186,16 @@ int nbd_negotiate(int csock, off_t size) [ 0 .. 7] passwd ("NBDMAGIC") [ 8 .. 15] magic (0x00420281861253) [16 .. 23] size - [24 .. 151] reserved (0) + [24 .. 27] flags + [28 .. 151] reserved (0) */ TRACE("Beginning negotiation."); memcpy(buf, "NBDMAGIC", 8); cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL); cpu_to_be64w((uint64_t*)(buf + 16), size); - memset(buf + 24, 0, 128); + cpu_to_be32w((uint32_t*)(buf + 24), flags | NBD_FLAG_HAS_FLAGS); + memset(buf + 28, 0, 124); if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { LOG("write failed"); @@ -337,8 +345,8 @@ int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, return 0; } -#ifndef _WIN32 -int nbd_init(int fd, int csock, off_t size, size_t blocksize) +#ifdef __linux__ +int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize) { TRACE("Setting block size to %lu", (unsigned long)blocksize); @@ -358,6 +366,26 @@ int nbd_init(int fd, int csock, off_t size, size_t blocksize) return -1; } + if (flags & NBD_FLAG_READ_ONLY) { + int read_only = 1; + TRACE("Setting readonly attribute"); + + if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { + int serrno = errno; + LOG("Failed setting read-only attribute"); + errno = serrno; + return -1; + } + } + + if (ioctl(fd, NBD_SET_FLAGS, flags) < 0 + && errno != ENOTTY) { + int serrno = errno; + LOG("Failed setting flags"); + errno = serrno; + return -1; + } + TRACE("Clearing NBD socket"); if (ioctl(fd, NBD_CLEAR_SOCK) == -1) { @@ -548,7 +576,7 @@ static int nbd_send_reply(int csock, struct nbd_reply *reply) } int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, - off_t *offset, bool readonly, uint8_t *data, int data_size) + off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size) { struct nbd_request request; struct nbd_reply reply; @@ -632,7 +660,7 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, return -1; } - if (readonly) { + if (nbdflags & NBD_FLAG_READ_ONLY) { TRACE("Server is read-only, return error"); reply.error = 1; } else { @@ -37,10 +37,22 @@ struct nbd_reply { uint64_t handle; } QEMU_PACKED; +#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ +#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ +#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ +#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ +#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ + +#define NBD_CMD_MASK_COMMAND 0x0000ffff +#define NBD_CMD_FLAG_FUA (1 << 16) + enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, - NBD_CMD_DISC = 2 + NBD_CMD_DISC = 2, + NBD_CMD_FLUSH = 3, + NBD_CMD_TRIM = 4 }; #define NBD_DEFAULT_PORT 10809 @@ -53,14 +65,14 @@ int tcp_socket_incoming_spec(const char *address_and_port); int unix_socket_outgoing(const char *path); int unix_socket_incoming(const char *path); -int nbd_negotiate(int csock, off_t size); +int nbd_negotiate(int csock, off_t size, uint32_t flags); int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, off_t *size, size_t *blocksize); -int nbd_init(int fd, int csock, off_t size, size_t blocksize); +int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize); int nbd_send_request(int csock, struct nbd_request *request); int nbd_receive_reply(int csock, struct nbd_reply *reply); int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset, - off_t *offset, bool readonly, uint8_t *data, int data_size); + off_t *offset, uint32_t nbdflags, uint8_t *data, int data_size); int nbd_client(int fd); int nbd_disconnect(int fd); diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 3193dbf..d3c1174 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -42,7 +42,6 @@ struct qemu_paiocb { int aio_niov; size_t aio_nbytes; #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ - int ev_signo; off_t aio_offset; QTAILQ_ENTRY(qemu_paiocb) node; @@ -181,7 +180,6 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb) { - size_t offset = 0; ssize_t len; do { @@ -189,12 +187,12 @@ static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb) len = qemu_pwritev(aiocb->aio_fildes, aiocb->aio_iov, aiocb->aio_niov, - aiocb->aio_offset + offset); + aiocb->aio_offset); else len = qemu_preadv(aiocb->aio_fildes, aiocb->aio_iov, aiocb->aio_niov, - aiocb->aio_offset + offset); + aiocb->aio_offset); } while (len == -1 && errno == EINTR); if (len == -1) @@ -309,12 +307,10 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb) return nbytes; } +static void posix_aio_notify_event(void); + static void *aio_thread(void *unused) { - pid_t pid; - - pid = getpid(); - mutex_lock(&lock); pending_threads--; mutex_unlock(&lock); @@ -381,7 +377,7 @@ static void *aio_thread(void *unused) aiocb->ret = ret; mutex_unlock(&lock); - if (kill(pid, aiocb->ev_signo)) die("kill failed"); + posix_aio_notify_event(); } cur_threads--; @@ -548,18 +544,14 @@ static int posix_aio_flush(void *opaque) static PosixAioState *posix_aio_state; -static void aio_signal_handler(int signum) +static void posix_aio_notify_event(void) { - if (posix_aio_state) { - char byte = 0; - ssize_t ret; - - ret = write(posix_aio_state->wfd, &byte, sizeof(byte)); - if (ret < 0 && errno != EAGAIN) - die("write()"); - } + char byte = 0; + ssize_t ret; - qemu_service_io(); + ret = write(posix_aio_state->wfd, &byte, sizeof(byte)); + if (ret < 0 && errno != EAGAIN) + die("write()"); } static void paio_remove(struct qemu_paiocb *acb) @@ -623,7 +615,6 @@ BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, return NULL; acb->aio_type = type; acb->aio_fildes = fd; - acb->ev_signo = SIGUSR2; if (qiov) { acb->aio_iov = qiov->iov; @@ -651,7 +642,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, return NULL; acb->aio_type = QEMU_AIO_IOCTL; acb->aio_fildes = fd; - acb->ev_signo = SIGUSR2; acb->aio_offset = 0; acb->aio_ioctl_buf = buf; acb->aio_ioctl_cmd = req; @@ -665,7 +655,6 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, int paio_init(void) { - struct sigaction act; PosixAioState *s; int fds[2]; int ret; @@ -675,11 +664,6 @@ int paio_init(void) s = g_malloc(sizeof(PosixAioState)); - sigfillset(&act.sa_mask); - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ - act.sa_handler = aio_signal_handler; - sigaction(SIGUSR2, &act, NULL); - s->first_aio = NULL; if (qemu_pipe(fds) == -1) { fprintf(stderr, "failed to create pipe\n"); @@ -185,7 +185,7 @@ int main(int argc, char **argv) BlockDriverState *bs; off_t dev_offset = 0; off_t offset = 0; - bool readonly = false; + uint32_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; int port = NBD_DEFAULT_PORT; @@ -230,7 +230,6 @@ int main(int argc, char **argv) int nb_fds = 0; int max_fd; int persistent = 0; - uint32_t nbdflags; while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { @@ -263,7 +262,7 @@ int main(int argc, char **argv) } break; case 'r': - readonly = true; + nbdflags |= NBD_FLAG_READ_ONLY; flags &= ~BDRV_O_RDWR; break; case 'P': @@ -398,13 +397,13 @@ int main(int argc, char **argv) } ret = nbd_receive_negotiate(sock, NULL, &nbdflags, - &size, &blocksize); + &size, &blocksize); if (ret == -1) { ret = 1; goto out; } - ret = nbd_init(fd, sock, size, blocksize); + ret = nbd_init(fd, sock, nbdflags, size, blocksize); if (ret == -1) { ret = 1; goto out; @@ -463,7 +462,7 @@ int main(int argc, char **argv) for (i = 1; i < nb_fds && ret; i++) { if (FD_ISSET(sharing_fds[i], &fds)) { if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset, - &offset, readonly, data, NBD_BUFFER_SIZE) != 0) { + &offset, nbdflags, data, NBD_BUFFER_SIZE) != 0) { close(sharing_fds[i]); nb_fds--; sharing_fds[i] = sharing_fds[nb_fds]; @@ -479,7 +478,7 @@ int main(int argc, char **argv) (struct sockaddr *)&addr, &addr_len); if (sharing_fds[nb_fds] != -1 && - nbd_negotiate(sharing_fds[nb_fds], fd_size) != -1) { + nbd_negotiate(sharing_fds[nb_fds], fd_size, nbdflags) != -1) { if (sharing_fds[nb_fds] > max_fd) max_fd = sharing_fds[nb_fds]; nb_fds++; |