rdma 虚拟地址物理地址转换

[SPDK/NVMe存储技术分析]015 - 理解内存注册(Memory Registration) - 程序员大本营

drivers/infiniband/core/umem.c

/**

* ib_umem_get - Pin and DMA map userspace memory.

*

* @device: IB device to connect UMEM

* @addr: userspace virtual address to start at

* @size: length of region to pin

* @access: IB_ACCESS_xxx flags for memory being pinned

*/

struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,

             size_t size, int access)

{

    struct ib_umem *umem;

    struct page **page_list;

    unsigned long lock_limit;

    unsigned long new_pinned;

    unsigned long cur_base;

    unsigned long dma_attr = 0;

    struct mm_struct *mm;

    unsigned long npages;

    int pinned, ret;

    unsigned int gup_flags = FOLL_WRITE;

    /*

     * If the combination of the addr and size requested for this memory

     * region causes an integer overflow, return error.

     */

    if (((addr + size) < addr) ||

     PAGE_ALIGN(addr + size) < (addr + size))

        return ERR_PTR(-EINVAL);

    if (!can_do_mlock())

        return ERR_PTR(-EPERM);

    if (access & IB_ACCESS_ON_DEMAND)

        return ERR_PTR(-EOPNOTSUPP);

    umem = kzalloc(sizeof(*umem), GFP_KERNEL);

    if (!umem)

        return ERR_PTR(-ENOMEM);

    umem->ibdev = device;

    umem->length = size;

    umem->address = addr;

    /*

     * Drivers should call ib_umem_find_best_pgsz() to set the iova

     * correctly.

     */

    umem->iova = addr;

    umem->writable = ib_access_writable(access);

    umem->owning_mm = mm = current->mm;

    mmgrab(mm);

    page_list = (struct page **) __get_free_page(GFP_KERNEL);

    if (!page_list) {

        ret = -ENOMEM;

        goto umem_kfree;

    }

    npages = ib_umem_num_pages(umem);

    if (npages == 0 || npages > UINT_MAX) {

        ret = -EINVAL;

        goto out;

    }

    lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

    new_pinned = atomic64_add_return(npages, &mm->pinned_vm);

    if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {

        atomic64_sub(npages, &mm->pinned_vm);

        ret = -ENOMEM;

        goto out;

    }

    cur_base = addr & PAGE_MASK;

    if (!umem->writable)

        gup_flags |= FOLL_FORCE;

    while (npages) {

        cond_resched();

        pinned = pin_user_pages_fast(cur_base,

                     min_t(unsigned long, npages,

                        PAGE_SIZE /

                        sizeof(struct page *)),

                     gup_flags | FOLL_LONGTERM, page_list);

        if (pinned < 0) {

            ret = pinned;

            goto umem_release;

        }

        cur_base += pinned * PAGE_SIZE;

        npages -= pinned;

        ret = sg_alloc_append_table_from_pages(

            &umem->sgt_append, page_list, pinned, 0,

            pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),

            npages, GFP_KERNEL);

        if (ret) {

            unpin_user_pages_dirty_lock(page_list, pinned, 0);

            goto umem_release;

        }

    }

    if (access & IB_ACCESS_RELAXED_ORDERING)

        dma_attr |= DMA_ATTR_WEAK_ORDERING;

    ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,

                 DMA_BIDIRECTIONAL, dma_attr);

    if (ret)

        goto umem_release;

    goto out;

umem_release:

    __ib_umem_release(device, umem, 0);

    atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);

out:

    free_page((unsigned long) page_list);

umem_kfree:

    if (ret) {

        mmdrop(umem->owning_mm);

        kfree(umem);

    }

    return ret ? ERR_PTR(ret) : umem;

}

EXPORT_SYMBOL(ib_umem_get);

include/rdma/ibverbs.h

/**

* ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses

* @dev: The device for which the DMA addresses are to be created

* @sg: The sg_table object describing the buffer

* @direction: The direction of the DMA

* @attrs: Optional DMA attributes for the map operation

*/

static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,

                     struct sg_table *sgt,

                     enum dma_data_direction direction,

                     unsigned long dma_attrs)

{

    int nents;

    if (ib_uses_virt_dma(dev)) {

        nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);

        if (!nents)

            return -EIO;

        sgt->nents = nents;

        return 0;

    }

    return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);

}

infiniband/core/device.c

#ifdef CONFIG_INFINIBAND_VIRT_DMA

int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)

{

    struct scatterlist *s;

    int i;

    for_each_sg(sg, s, nents, i) {

        sg_dma_address(s) = (uintptr_t)sg_virt(s);

        sg_dma_len(s) = s->length;

    }

    return nents;

}

EXPORT_SYMBOL(ib_dma_virt_map_sg);

#endif /* CONFIG_INFINIBAND_VIRT_DMA */

kernel/dma/mapping.c

/**

* dma_map_sgtable - Map the given buffer for DMA

* @dev:    The device for which to perform the DMA operation

* @sgt:    The sg_table object describing the buffer

* @dir:    DMA direction

* @attrs:  Optional DMA attributes for the map operation

*

* Maps a buffer described by a scatterlist stored in the given sg_table

* object for the @dir DMA operation by the @dev device. After success, the

* ownership for the buffer is transferred to the DMA domain. One has to

* call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the

* ownership of the buffer back to the CPU domain before touching the

* buffer by the CPU.

*

* Returns 0 on success or a negative error code on error. The following

* error codes are supported with the given meaning:

*

* -EINVAL    An invalid argument, unaligned access or other error

*      in usage. Will not succeed if retried.

* -ENOMEM    Insufficient resources (like memory or IOVA space) to

*      complete the mapping. Should succeed if retried later.

* -EIO   Legacy error code with an unknown meaning. eg. this is

*      returned if a lower level call returned DMA_MAPPING_ERROR.

*/

int dma_map_sgtable(struct device *dev, struct sg_table *sgt,

         enum dma_data_direction dir, unsigned long attrs)

{

    int nents;

    nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);

    if (nents < 0)

        return nents;

    sgt->nents = nents;

    return 0;

}

EXPORT_SYMBOL_GPL(dma_map_sgtable);

static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,

     int nents, enum dma_data_direction dir, unsigned long attrs)

{

    const struct dma_map_ops *ops = get_dma_ops(dev);

    int ents;

    BUG_ON(!valid_dma_direction(dir));

    if (WARN_ON_ONCE(!dev->dma_mask))

        return 0;

    if (dma_map_direct(dev, ops) ||

     arch_dma_map_sg_direct(dev, sg, nents))

        ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);

    else

        ents = ops->map_sg(dev, sg, nents, dir, attrs);

    if (ents > 0)

        debug_dma_map_sg(dev, sg, nents, ents, dir, attrs);

    else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&

             ents != -EIO))

        return -EIO;

    return ents;

}

kernel/dma/direct.c

int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,

        enum dma_data_direction dir, unsigned long attrs)

{

    int i;

    struct scatterlist *sg;

    for_each_sg(sgl, sg, nents, i) {

        sg->dma_address = dma_direct_map_page(dev, sg_page(sg),

                sg->offset, sg->length, dir, attrs);

        if (sg->dma_address == DMA_MAPPING_ERROR)

            goto out_unmap;

        sg_dma_len(sg) = sg->length;

    }

    return nents;

out_unmap:

    dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);

    return -EIO;

}

kernel/dma/direct.h

static inline dma_addr_t dma_direct_map_page(struct device *dev,

        struct page *page, unsigned long offset, size_t size,

        enum dma_data_direction dir, unsigned long attrs)

{

    phys_addr_t phys = page_to_phys(page) + offset;

    dma_addr_t dma_addr = phys_to_dma(dev, phys);

    if (is_swiotlb_force_bounce(dev))

        return swiotlb_map(dev, phys, size, dir, attrs);

    if (unlikely(!dma_capable(dev, dma_addr, size, true))) {

        if (is_swiotlb_active(dev))

            return swiotlb_map(dev, phys, size, dir, attrs);

        dev_WARN_ONCE(dev, 1,

             "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",

             &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);

        return DMA_MAPPING_ERROR;

    }

    if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))

        arch_sync_dma_for_device(phys, size, dir);

    return dma_addr;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值