/*
 * DMA helper functions
 *
 * Copyright (c) 2009,2020 Red Hat
 *
 * This work is licensed under the terms of the GNU General Public License
 * (GNU GPL), version 2 or later.
 */

#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "sysemu/dma.h"
#include "trace/trace-root.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "sysemu/cpu-timers.h"
#include "qemu/range.h"

/* #define DEBUG_IOMMU */

MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
                           uint8_t c, dma_addr_t len, MemTxAttrs attrs)
{
    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);

#define FILLBUF_SIZE 512
    uint8_t fillbuf[FILLBUF_SIZE];
    int l;
    MemTxResult error = MEMTX_OK;

    memset(fillbuf, c, FILLBUF_SIZE);
    while (len > 0) {
        l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
        error |= address_space_write(as, addr, attrs, fillbuf, l);
        len -= l;
        addr += l;
    }

    return error;
}

void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
                      AddressSpace *as)
{
    qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
    qsg->nsg = 0;
    qsg->nalloc = alloc_hint;
    qsg->size = 0;
    qsg->as = as;
    qsg->dev = dev;
    object_ref(OBJECT(dev));
}

void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
{
    if (qsg->nsg == qsg->nalloc) {
        qsg->nalloc = 2 * qsg->nalloc + 1;
        qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
    }
    qsg->sg[qsg->nsg].base = base;
    qsg->sg[qsg->nsg].len = len;
    qsg->size += len;
    ++qsg->nsg;
}

void qemu_sglist_destroy(QEMUSGList *qsg)
{
    object_unref(OBJECT(qsg->dev));
    g_free(qsg->sg);
    memset(qsg, 0, sizeof(*qsg));
}

typedef struct {
    BlockAIOCB common;
    AioContext *ctx;
    BlockAIOCB *acb;
    QEMUSGList *sg;
    uint32_t align;
    uint64_t offset;
    DMADirection dir;
    int sg_cur_index;
    dma_addr_t sg_cur_byte;
    QEMUIOVector iov;
    QEMUBH *bh;
    DMAIOFunc *io_func;
    void *io_func_opaque;
} DMAAIOCB;

static void dma_blk_cb(void *opaque, int ret);

static void reschedule_dma(void *opaque)
{
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;

    assert(!dbs->acb && dbs->bh);
    qemu_bh_delete(dbs->bh);
    dbs->bh = NULL;
    dma_blk_cb(dbs, 0);
}

static void dma_blk_unmap(DMAAIOCB *dbs)
{
    int i;

    for (i = 0; i < dbs->iov.niov; ++i) {
        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
                         dbs->iov.iov[i].iov_len, dbs->dir,
                         dbs->iov.iov[i].iov_len);
    }
    qemu_iovec_reset(&dbs->iov);
}

static void dma_complete(DMAAIOCB *dbs, int ret)
{
    trace_dma_complete(dbs, ret, dbs->common.cb);

    assert(!dbs->acb && !dbs->bh);
    dma_blk_unmap(dbs);
    if (dbs->common.cb) {
        dbs->common.cb(dbs->common.opaque, ret);
    }
    qemu_iovec_destroy(&dbs->iov);
    qemu_aio_unref(dbs);
}

static void dma_blk_cb(void *opaque, int ret)
{
    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
    dma_addr_t cur_addr, cur_len;
    void *mem;

    trace_dma_blk_cb(dbs, ret);

    dbs->acb = NULL;
    dbs->offset += dbs->iov.size;

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dma_complete(dbs, ret);
        return;
    }
    dma_blk_unmap(dbs);

    while (dbs->sg_cur_index < dbs->sg->nsg) {
        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
                             MEMTXATTRS_UNSPECIFIED);
        /*
         * Make reads deterministic in icount mode. Windows sometimes issues
         * disk read requests with overlapping SGs. It leads
         * to non-determinism, because resulting buffer contents may be mixed
         * from several sectors. This code splits all SGs into several
         * groups. SGs in every group do not overlap.
         */
        if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
            int i;
            for (i = 0 ; i < dbs->iov.niov ; ++i) {
                if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
                                   dbs->iov.iov[i].iov_len, (intptr_t)mem,
                                   cur_len)) {
                    dma_memory_unmap(dbs->sg->as, mem, cur_len,
                                     dbs->dir, cur_len);
                    mem = NULL;
                    break;
                }
            }
        }
        if (!mem)
            break;
        qemu_iovec_add(&dbs->iov, mem, cur_len);
        dbs->sg_cur_byte += cur_len;
        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
            dbs->sg_cur_byte = 0;
            ++dbs->sg_cur_index;
        }
    }

    if (dbs->iov.size == 0) {
        trace_dma_map_wait(dbs);
        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
        cpu_register_map_client(dbs->bh);
        return;
    }

    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
        qemu_iovec_discard_back(&dbs->iov,
                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
    }

    aio_context_acquire(dbs->ctx);
    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
                            dma_blk_cb, dbs, dbs->io_func_opaque);
    aio_context_release(dbs->ctx);
    assert(dbs->acb);
}

static void dma_aio_cancel(BlockAIOCB *acb)
{
    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);

    trace_dma_aio_cancel(dbs);

    assert(!(dbs->acb && dbs->bh));
    if (dbs->acb) {
        /* This will invoke dma_blk_cb.  */
        blk_aio_cancel_async(dbs->acb);
        return;
    }

    if (dbs->bh) {
        cpu_unregister_map_client(dbs->bh);
        qemu_bh_delete(dbs->bh);
        dbs->bh = NULL;
    }
    if (dbs->common.cb) {
        dbs->common.cb(dbs->common.opaque, -ECANCELED);
    }
}

static AioContext *dma_get_aio_context(BlockAIOCB *acb)
{
    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);

    return dbs->ctx;
}

static const AIOCBInfo dma_aiocb_info = {
    .aiocb_size         = sizeof(DMAAIOCB),
    .cancel_async       = dma_aio_cancel,
    .get_aio_context    = dma_get_aio_context,
};

BlockAIOCB *dma_blk_io(AioContext *ctx,
    QEMUSGList *sg, uint64_t offset, uint32_t align,
    DMAIOFunc *io_func, void *io_func_opaque,
    BlockCompletionFunc *cb,
    void *opaque, DMADirection dir)
{
    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);

    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));

    dbs->acb = NULL;
    dbs->sg = sg;
    dbs->ctx = ctx;
    dbs->offset = offset;
    dbs->align = align;
    dbs->sg_cur_index = 0;
    dbs->sg_cur_byte = 0;
    dbs->dir = dir;
    dbs->io_func = io_func;
    dbs->io_func_opaque = io_func_opaque;
    dbs->bh = NULL;
    qemu_iovec_init(&dbs->iov, sg->nsg);
    dma_blk_cb(dbs, 0);
    return &dbs->common;
}


static
BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
                                 BlockCompletionFunc *cb, void *cb_opaque,
                                 void *opaque)
{
    BlockBackend *blk = opaque;
    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
}

BlockAIOCB *dma_blk_read(BlockBackend *blk,
                         QEMUSGList *sg, uint64_t offset, uint32_t align,
                         void (*cb)(void *opaque, int ret), void *opaque)
{
    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
                      dma_blk_read_io_func, blk, cb, opaque,
                      DMA_DIRECTION_FROM_DEVICE);
}

static
BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
                                  BlockCompletionFunc *cb, void *cb_opaque,
                                  void *opaque)
{
    BlockBackend *blk = opaque;
    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
}

BlockAIOCB *dma_blk_write(BlockBackend *blk,
                          QEMUSGList *sg, uint64_t offset, uint32_t align,
                          void (*cb)(void *opaque, int ret), void *opaque)
{
    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
                      dma_blk_write_io_func, blk, cb, opaque,
                      DMA_DIRECTION_TO_DEVICE);
}


static MemTxResult dma_buf_rw(void *buf, int32_t len, uint64_t *residp,
                              QEMUSGList *sg, DMADirection dir,
                              MemTxAttrs attrs)
{
    uint8_t *ptr = buf;
    uint64_t resid;
    int sg_cur_index;
    MemTxResult res = MEMTX_OK;

    resid = sg->size;
    sg_cur_index = 0;
    len = MIN(len, resid);
    while (len > 0) {
        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
        int32_t xfer = MIN(len, entry.len);
        res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
        ptr += xfer;
        len -= xfer;
        resid -= xfer;
    }

    if (residp) {
        *residp = resid;
    }
    return res;
}

uint64_t dma_buf_read(void *ptr, int32_t len, QEMUSGList *sg, MemTxAttrs attrs)
{
    uint64_t resid;

    dma_buf_rw(ptr, len, &resid, sg, DMA_DIRECTION_FROM_DEVICE, attrs);

    return resid;
}

uint64_t dma_buf_write(void *ptr, int32_t len, QEMUSGList *sg, MemTxAttrs attrs)
{
    uint64_t resid;

    dma_buf_rw(ptr, len, &resid, sg, DMA_DIRECTION_TO_DEVICE, attrs);

    return resid;
}

void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
                    QEMUSGList *sg, enum BlockAcctType type)
{
    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
}

uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
{
    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
    uint64_t alignment_mask, size_mask;

    if (max_addr_bits != 64) {
        max_mask = (1ULL << max_addr_bits) - 1;
    }

    alignment_mask = start ? (start & -start) - 1 : max_mask;
    alignment_mask = MIN(alignment_mask, max_mask);
    size_mask = MIN(addr_mask, max_mask);

    if (alignment_mask <= size_mask) {
        /* Increase the alignment of start */
        return alignment_mask;
    } else {
        /* Find the largest page mask from size */
        if (addr_mask == UINT64_MAX) {
            return UINT64_MAX;
        }
        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
    }
}