diff options
-rw-r--r-- | kmod/muser.c | 92 | ||||
-rw-r--r-- | lib/libmuser.c | 149 | ||||
-rw-r--r-- | lib/pci.h | 11 |
3 files changed, 220 insertions, 32 deletions
diff --git a/kmod/muser.c b/kmod/muser.c index ccb3f2a..a4ec3fb 100644 --- a/kmod/muser.c +++ b/kmod/muser.c @@ -1313,6 +1313,53 @@ static int bounce_out(void __user *arg, size_t argsz, struct mudev_cmd *mucmd) return 0; } +/* + * copy from server(ubuf) to vfio-client pages(mucmd.pg_map) + * skip seek bytes from destination before copying. + * + * @page_map: map representing vfio-client pages + * @ubuf : user buffer to copy from + * @bufsz : size of ubuf + * @seek : bytes to be skip from page_map before copy + */ +int bounce_in_seek(struct page_map *page_map, void __user *ubuf, size_t bufsz, + size_t seek) +{ + unsigned long to_copy = 0; + void __user *from = ubuf; + void *to; + size_t total, offset, pgoff; + int pgnr, i, ret; + + if (page_map->len < bufsz) + return -ENOSPC; + + pgnr = NR_PAGES(seek) - 1; + pgoff = seek & ~PAGE_SIZE; + offset = page_map->offset; + + if (!pgnr) + offset += pgoff; + else + offset = pgoff; + + total = bufsz; + for (i = pgnr; i < page_map->nr_pages; i++) { + to = page_to_virt(page_map->pages[i]) + offset; + from += to_copy; + to_copy = min(total, PAGE_SIZE - offset); + + ret = muser_copyin(to, from, to_copy); + if (ret) + return ret; + + total -= to_copy; + offset = 0; + } + + return 0; +} + /* copy from server(uaddr) to vfio-client pages(mucmd.pg_map) */ static int bounce_in(struct mudev_cmd *mucmd, void __user *uaddr) { @@ -1669,6 +1716,7 @@ static ssize_t libmuser_write(struct file *filp, const char __user *buf, struct muser_dev *mudev = filp->private_data; struct mudev_cmd *mucmd = mudev->mucmd_pending; struct muser_cmd muser_cmd; + unsigned int seek; int ret; if (!mucmd || !mudev) { @@ -1681,21 +1729,43 @@ static ssize_t libmuser_write(struct file *filp, const char __user *buf, return -EFAULT; } - ret = muser_copyin(&muser_cmd, (void __user *)buf, - sizeof(struct muser_cmd)); - if (ret) - return ret; + switch (mucmd->type) { + case MUSER_READ: + ret = muser_copyin(&muser_cmd, (void __user *)buf, + sizeof(struct muser_cmd)); + if (ret) + return ret; + + /* + * TODO: libmuser must not encapsulate buf in muser_cmd instead + * it must just call write() with buf. + */ - if (mucmd->type != muser_cmd.type) { - muser_dbg("bad command %d", muser_cmd.type); + if (mucmd->type != muser_cmd.type) { + muser_dbg("bad command %d", muser_cmd.type); + return -EINVAL; + } + + ret = bounce_in(mucmd, muser_cmd.rw.buf); + if (ret) + return ret; + break; + case MUSER_IOCTL: + /* + * copy the sparse mmap cap information after the + * struct vfio_region_info. + */ + seek = sizeof(struct vfio_region_info); + ret = bounce_in_seek(&mucmd->pg_map, (void __user *)buf, bufsz, + seek); + if (ret) + return ret; + mucmd->pg_map.len -= seek; + break; + default: return -EINVAL; } - WARN_ON(muser_cmd.type != MUSER_READ); - ret = bounce_in(mucmd, muser_cmd.rw.buf); - if (ret) - return ret; - return bufsz; } diff --git a/lib/libmuser.c b/lib/libmuser.c index 64e96fc..2f6cbb2 100644 --- a/lib/libmuser.c +++ b/lib/libmuser.c @@ -89,7 +89,6 @@ struct lm_ctx { }; MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t); -#define LM_CTX_SIZE(irqs) (sizeof(lm_ctx_t) + sizeof(int) * irqs) #define LM2VFIO_IRQT(type) (type - 1) void lm_log(const lm_ctx_t * const ctx, const lm_log_lvl_t lvl, @@ -339,25 +338,96 @@ static long dev_get_irqinfo(lm_ctx_t * lm_ctx, struct vfio_irq_info *irq_info) return 0; } +/* + * Populate the sparse mmap capability information to vfio-client. + * kernel/muser constructs the response for VFIO_DEVICE_GET_REGION_INFO + * accommodating sparse mmap information. + * Sparse mmap information stays after struct vfio_region_info and cap_offest + * points accordingly. + */ +static int +dev_get_sparse_mmap_cap(lm_ctx_t *lm_ctx, lm_reg_info_t *lm_reg, + struct vfio_region_info *vfio_reg) +{ + struct vfio_region_info_cap_sparse_mmap *sparse = NULL; + struct lm_sparse_mmap_areas *mmap_areas; + int nr_mmap_areas, i; + size_t size; + ssize_t ret; + + if (!lm_reg->mmap_areas) + return -EINVAL; + + nr_mmap_areas = lm_reg->mmap_areas->nr_mmap_areas; + size = sizeof(*sparse) + (nr_mmap_areas * sizeof(*sparse->areas)); + + /* + * If vfio_reg does not have enough space to accommodate sparse info then + * set the argsz with the expected size and return. Vfio client will call + * back after reallocating the vfio_reg + */ + + if (vfio_reg->argsz < size + sizeof(*vfio_reg)) { + vfio_reg->argsz = size + sizeof(*vfio_reg); + vfio_reg->cap_offset = 0; + return 0; + } + + lm_log(lm_ctx, LM_DBG, "%s: size %llu, nr_mmap_areas %u\n", __func__, size, + nr_mmap_areas); + sparse = calloc(1, size); + if (!sparse) + return -ENOMEM; + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; + sparse->header.next = 0; + sparse->nr_areas = nr_mmap_areas; + + mmap_areas = lm_reg->mmap_areas; + for (i = 0; i < nr_mmap_areas; i++) { + sparse->areas[i].offset = mmap_areas->areas[i].start; + sparse->areas[i].size = mmap_areas->areas[i].size; + } + + /* write the sparse mmap cap info to vfio-client user pages */ + ret = write(lm_ctx->fd, sparse, size); + if (ret != size) { + free(sparse); + return -EIO; + } + + vfio_reg->flags |= VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS; + vfio_reg->cap_offset = sizeof(*vfio_reg); + + free(sparse); + return 0; +} + static long -dev_get_reginfo(lm_ctx_t * lm_ctx, struct vfio_region_info *reg_info) +dev_get_reginfo(lm_ctx_t * lm_ctx, struct vfio_region_info *vfio_reg) { + lm_reg_info_t *lm_reg; + int err; + assert(lm_ctx != NULL); - assert(reg_info != NULL); - lm_pci_info_t *pci_info = &lm_ctx->pci_info; + assert(vfio_reg != NULL); + lm_reg = &lm_ctx->pci_info.reg_info[vfio_reg->index]; // Ensure provided argsz is sufficiently big and index is within bounds. - if ((reg_info->argsz < sizeof(struct vfio_region_info)) || - (reg_info->index >= LM_DEV_NUM_REGS)) { + if ((vfio_reg->argsz < sizeof(struct vfio_region_info)) || + (vfio_reg->index >= LM_DEV_NUM_REGS)) { return -EINVAL; } - reg_info->offset = pci_info->reg_info[reg_info->index].offset; - reg_info->flags = pci_info->reg_info[reg_info->index].flags; - reg_info->size = pci_info->reg_info[reg_info->index].size; + vfio_reg->offset = lm_reg->offset; + vfio_reg->flags = lm_reg->flags; + vfio_reg->size = lm_reg->size; + + if (lm_reg->mmap_areas) + err = dev_get_sparse_mmap_cap(lm_ctx, lm_reg, vfio_reg); - lm_log(lm_ctx, LM_DBG, "region_info[%d]\n", reg_info->index); - dump_buffer(lm_ctx, "", (unsigned char *)reg_info, sizeof *reg_info); + lm_log(lm_ctx, LM_DBG, "region_info[%d]\n", vfio_reg->index); + dump_buffer(lm_ctx, "", (unsigned char *)vfio_reg, sizeof *vfio_reg); return 0; } @@ -478,8 +548,8 @@ static int muser_mmap(lm_ctx_t * lm_ctx, struct muser_cmd *cmd) unsigned long addr; unsigned long len = cmd->mmap.request.len; unsigned long pgoff = cmd->mmap.request.pgoff; - int err = 0; + region = lm_get_region(lm_ctx, pgoff, len, &pgoff); if (region < 0) { lm_log(lm_ctx, LM_ERR, "bad region %d\n", region); err = region; @@ -920,6 +990,39 @@ init_pci_hdr(lm_pci_hdr_t * const hdr, const lm_pci_hdr_id_t * const id, hdr->ss.vid = hdr->id.vid; hdr->ss.sid = hdr->id.did; + +} + +static int copy_sparse_mmap_areas(lm_reg_info_t *dst, lm_reg_info_t *src) +{ + struct lm_sparse_mmap_areas *mmap_areas; + int nr_mmap_areas; + size_t size; + int i; + + for (i = 0; i < LM_DEV_NUM_REGS; i++) { + if (!src[i].mmap_areas) + continue; + + nr_mmap_areas = src[i].mmap_areas->nr_mmap_areas; + size = sizeof(*mmap_areas) + (nr_mmap_areas * sizeof(struct lm_mmap_area)); + mmap_areas = calloc(1, size); + if (!mmap_areas) + return -ENOMEM; + + memcpy(mmap_areas, src[i].mmap_areas, size); + dst[i].mmap_areas = mmap_areas; + } + + return 0; +} + +static void free_sparse_mmap_areas(lm_reg_info_t *reg_info) +{ + int i; + + for (i = 0; i < LM_DEV_NUM_REGS; i++) + free(reg_info[i].mmap_areas); } static int @@ -951,14 +1054,14 @@ lm_ctx_t * lm_ctx_create(lm_dev_info_t * const dev_info) { lm_ctx_t *lm_ctx = NULL; - uint32_t max_ivs = 0; + uint32_t max_ivs = 0, nr_mmap_areas = 0; uint32_t i; int err = 0; - size_t size; + size_t size = 0; if (dev_info == NULL) { - err = EINVAL; - goto out; + errno = EINVAL; + return NULL; } for (i = 0; i < LM_DEV_NUM_IRQS; i++) { @@ -967,13 +1070,16 @@ lm_ctx_create(lm_dev_info_t * const dev_info) } } - lm_ctx = calloc(1, LM_CTX_SIZE(max_ivs)); - if (lm_ctx == NULL) { - err = errno; - goto out; - } + size += sizeof(int) * max_ivs; + lm_ctx = calloc(1, sizeof(lm_ctx_t) + size); + if (lm_ctx == NULL) + return NULL; memcpy(&lm_ctx->pci_info, &dev_info->pci_info, sizeof(lm_pci_info_t)); + err = copy_sparse_mmap_areas(lm_ctx->pci_info.reg_info, + dev_info->pci_info.reg_info); + if (err) + goto out; lm_ctx->fd = dev_attach(dev_info->uuid); if (lm_ctx->fd == -1) { @@ -1030,6 +1136,7 @@ out: if (err) { if (lm_ctx) { dev_detach(lm_ctx->fd); + free_sparse_mmap_areas(lm_ctx->pci_info.reg_info); free(lm_ctx->pci_config_space); free(lm_ctx); lm_ctx = NULL; @@ -241,6 +241,16 @@ _Static_assert(sizeof(struct lm_pci_config_space) == 0x100, #define LM_REG_FLAG_RW (LM_REG_FLAG_READ | LM_REG_FLAG_WRITE) #define LM_REG_FLAG_MEM (1 << 3) // if unset, bar is IO +struct lm_mmap_area { + uint64_t start; + uint64_t size; +}; + +struct lm_sparse_mmap_areas { + int nr_mmap_areas; + struct lm_mmap_area areas[]; +}; + typedef ssize_t (lm_region_access_t) (void *pvt, char * const buf, size_t count, loff_t offset, const bool is_write); @@ -252,6 +262,7 @@ struct lm_reg_info { uint64_t offset; lm_region_access_t *fn; lm_map_region_t *map; + struct lm_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */ }; enum { |