//设备模型中封装了dma相关的操作函数集:
struct device {
....
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;//Like dma_mask, but for alloc_coherent mapping as not all hardware
//supports 64 bit addresses for consistent allocations such descriptors.
struct device_dma_parameters *dma_parms;
struct list_head dma_pools; /* dma pools (if dma'ble) */
struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */
#ifdef CONFIG_CMA
struct cma *cma_area; /* contiguous memory area for dma allocations */
#endif
/* arch specific additions */
struct dev_archdata archdata;
....
}
struct dev_archdata {
struct dma_map_ops *dma_ops;
#ifdef CONFIG_DMABOUNCE
struct dmabounce_device_info *dmabounce;
#endif
#ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */
#endif
#ifdef CONFIG_ARM_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping;
#endif
};
struct dma_map_ops {
void* (*alloc)(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
struct dma_attrs *attrs);
void (*free)(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
struct dma_attrs *attrs);
int (*mmap)(struct device *, struct vm_area_struct *,
void *, dma_addr_t, size_t, struct dma_attrs *attrs);
int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *,
dma_addr_t, size_t, struct dma_attrs *attrs);
dma_addr_t (*map_page)(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
struct dma_attrs *attrs);
void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs);
....
int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
int (*dma_supported)(struct device *dev, u64 mask);
int (*set_dma_mask)(struct device *dev, u64 mask);
....
int is_phys;
};
@arch/arm/mm/dma-mapping.c中dma_map_ops结构实例化如下:
struct dma_map_ops iommu_ops = {
.alloc = arm_iommu_alloc_attrs,
.free = arm_iommu_free_attrs,
.mmap = arm_iommu_mmap_attrs,
.get_sgtable = arm_iommu_get_sgtable,
.map_page = arm_iommu_map_page,
.unmap_page = arm_iommu_unmap_page,
.sync_single_for_cpu = arm_iommu_sync_single_for_cpu,
.sync_single_for_device = arm_iommu_sync_single_for_device,
.map_sg = arm_iommu_map_sg,
.unmap_sg = arm_iommu_unmap_sg,
.sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu,
.sync_sg_for_device = arm_iommu_sync_sg_for_device,
};
//dma-mapping.h文件封装了设备模型中dma相关的操作集,提供了arch specific的接口函数,例如:
@arch/arm/include/asm/dma-mapping.h
static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
{
BUG_ON(!dev);
dev->archdata.dma_ops = ops;
}
一致性dma映射主要涉及了两个函数:
void* dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
void* dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
对比发现,两个函数的不同之处除了dma_alloc_coherent()会首先尝试在设备自身的dev->dma_mem中查找连续可用页面之外,还有一点:
调用__dma_alloc()时对于页面的属性设置所用的参数有所不同
dma_alloc_coherent() -- pgprot_dmacoherent(pgprot_kernel)
dma_alloc_writecombine() -- pgprot_writecombine(pgprot_kernel)
//一致性dma映射
void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
{
void *memory;
//首先从设备自身的dev->dma_mem中查找连续可用的页面,如果找到将其返回,否则__dma_alloc()
if (dma_alloc_from_coherent(dev, size, handle, &memory))
//这里需要注意__dma_alloc()的第五个参数pgprot_dmacoherent(pgprot_kernel)
return __dma_alloc(dev, size, handle, gfp, pgprot_dmacoherent(pgprot_kernel),
__builtin_return_address(0));
}
EXPORT_SYMBOL(dma_alloc_coherent);
void *
dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
{
//这里需要注意__dma_alloc()的第五个参数pgprot_writecombine(pgprot_kernel)
return __dma_alloc(dev, size, handle, gfp, pgprot_writecombine(pgprot_kernel),
__builtin_return_address(0));
}
EXPORT_SYMBOL(dma_alloc_writecombine);
//pgprot_dmacoherent(pgprot_kernel)的定义
//@arch/arm/include/asm/pgtable.h
#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
#define pgprot_dmacoherent(prot) __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
#else
#define pgprot_dmacoherent(prot) __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED | L_PTE_XN)
#endif
//pgprot_writecombine(pgprot_kernel)的定义
//@arch/arm/include/asm/pgtable.h
#define pgprot_writecombine(prot) __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
//@arch/arm/include/asm/pgtable-2level.h
#define L_PTE_XN (_AT(pteval_t, 1) << 9)
#define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */
#define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */
//由此可见,在没有配置CONFIG_ARM_DMA_MEM_BUFFERABLE的情况下,dma_alloc_coherent和dma_alloc_writecombine的区别在于:
//1. 前者为UNCACHED&BUFFERABLE,而后者为CACHED&BUFFERABLE
//2. 前者为L_PTE_XN(可执行),后者不是.
//在配置了CONFIG_ARM_DMA_MEM_BUFFERABLE的情况下,dma_alloc_coherent和dma_alloc_writecombine的区别只在于:
//1. 前者为L_PTE_XN(可执行),后者不是.
int
dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret)
+-- mem = dev->dma_mem;
+-- pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
+-- *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+-- *ret = mem->virt_base + (pageno << PAGE_SHIFT);
+-- memset(*ret, 0, size);
//__dma_alloc()会根据prot指定的属性来设置页表项属性
static void *
__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, pgprot_t prot, const void *caller)
+-- page = __dma_alloc_buffer(dev, size, gfp);
+-- u64 mask = get_coherent_dma_mask(dev);
+-- page = alloc_pages(gfp, order);
+-- split_page(page, order);
+-- for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
__free_page(p);
+-- ptr = page_address(page);
+-- memset(ptr, 0, size);
+-- dmac_flush_range(ptr, ptr + size);
+-- outer_flush_range(__pa(ptr), __pa(ptr) + size);
+-- return page;
//@arch/arm/include/asm/memory.h : #define arch_is_coherent() 0
+-- if (!arch_is_coherent())
addr = __dma_alloc_remap(page, size, gfp, prot, caller); //对于arm体系结构来说,执行的永远是这个分支
else
addr = page_address(page);
+-- *handle = pfn_to_dma(dev, page_to_pfn(page)); //拿到page的总线地址
+-- return addr;
static void *
__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,const void *caller)
{
struct arm_vmregion *c;
size_t align;
int bit;
if (!consistent_pte) {
printk(KERN_ERR "%s: not initialised\n", __func__);
dump_stack();
return NULL;
}
//Align the virtual region allocation - maximum alignment is a section size, minimum is a page size.
//This helps reduce fragmentation of the DMA space, and also prevents allocations smaller than a
//section from crossing a section boundary.
bit = fls(size - 1);
if (bit > SECTION_SHIFT)
bit = SECTION_SHIFT;
align = 1 << bit;
//Allocate a virtual address in the consistent mapping region.
c = arm_vmregion_alloc(&consistent_head, align, size, gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller);
if (c) {
pte_t *pte;
int idx = CONSISTENT_PTE_INDEX(c->vm_start);
u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
pte = consistent_pte[idx] + off;
c->vm_pages = page;
do {
BUG_ON(!pte_none(*pte));
set_pte_ext(pte, mk_pte(page, prot), 0);//用传入的prot设置pte属性
page++;
pte++;
off++;
if (off >= PTRS_PER_PTE) {
off = 0;
pte = consistent_pte[++idx];
}
} while (size -= PAGE_SIZE);
dsb();
return (void *)c->vm_start;
}
return NULL;
}
//流式dma映射
//@arch/arm/include/asm/dma-mapping.h
/**
* dma_map_single - map a single buffer for streaming DMA
* @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
* @cpu_addr: CPU direct mapped address of buffer
* @size: size of buffer to map
* @dir: DMA transfer direction
*
* Ensure that any data held in the cache is appropriately discarded
* or written back.
*
* The device owns this memory once this call has completed. The CPU
* can regain ownership by calling dma_unmap_single() or
* dma_sync_single_for_cpu().
*/
static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
size_t size, enum dma_data_direction dir)
{
unsigned long offset;
struct page *page;
dma_addr_t addr;
BUG_ON(!virt_addr_valid(cpu_addr));
BUG_ON(!virt_addr_valid(cpu_addr + size - 1));
BUG_ON(!valid_dma_direction(dir));
page = virt_to_page(cpu_addr);
offset = (unsigned long)cpu_addr & ~PAGE_MASK;
addr = __dma_map_page(dev, page, offset, size, dir);
debug_dma_map_page(dev, page, offset, size, dir, addr, true);
return addr;
}
static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir)
{
__dma_page_cpu_to_dev(page, offset, size, dir);
return pfn_to_dma(dev, page_to_pfn(page)) + offset;
}
static inline void
__dma_page_cpu_to_dev(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir)
+-- if (!arch_is_coherent())
___dma_page_cpu_to_dev(page, off, size, dir);
+-- unsigned long paddr = page_to_phys(page) + off;
+-- if (dir != DMA_TO_DEVICE)
outer_inv_range(paddr, paddr + size);
+-- dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+-- if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
set_bit(PG_dcache_clean, &page->flags);
void ___dma_page_cpu_to_dev(struct page *page, unsigned long off,
size_t size, enum dma_data_direction dir)
{
unsigned long paddr;
//这个函数很重要
dma_cache_maint_page(page, off, size, dir, dmac_map_area);
//+-- dmac_map_area()
paddr = page_to_phys(page) + off;
if (dir == DMA_FROM_DEVICE) {
outer_inv_range(paddr, paddr + size);
} else {
outer_clean_range(paddr, paddr + size);
}
/* FIXME: non-speculating: flush on bidirectional mappings? */
}
EXPORT_SYMBOL(___dma_page_cpu_to_dev);
//这里调用的op实际上就是传入的dmac_map_area()
//#define dmac_map_area cpu_cache.dma_map_area
//又是一个platform specific的函数
static void dma_cache_maint_page(struct page *page, unsigned long offset,size_t size, enum dma_data_direction dir,
void (*op)(const void *, size_t, int))
{
/*
* A single sg entry may refer to multiple physically contiguous
* pages. But we still need to process highmem pages individually.
* If highmem is not configured then the bulk of this loop gets
* optimized out.
*/
size_t left = size;
do {
size_t len = left;
void *vaddr;
if (PageHighMem(page)) {
if (len + offset > PAGE_SIZE) {
if (offset >= PAGE_SIZE) {
page += offset / PAGE_SIZE;
offset %= PAGE_SIZE;
}
len = PAGE_SIZE - offset;
}
vaddr = kmap_high_get(page);
if (vaddr) {
vaddr += offset;
op(vaddr, len, dir);
kunmap_high(page);
} else if (cache_is_vipt()) {
/* unmapped pages might still be cached */
vaddr = kmap_atomic(page);
op(vaddr + offset, len, dir);
kunmap_atomic(vaddr);
}
} else {
vaddr = page_address(page) + offset;
op(vaddr, len, dir);
}
offset = 0;
page++;
left -= len;
} while (left);
}
static inline void
dma_sync_single_for_cpu(struct device *dev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
+-- dma_sync_single_range_for_cpu(dev, handle, 0, size, dir);
+-- valid_dma_direction(dir)
+-- debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir);
+-- dmabounce_sync_for_cpu(dev, handle, offset, size, dir) //同步回弹缓冲区
+-- __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir);
+-- if (!arch_is_coherent())
___dma_single_dev_to_cpu(kaddr, size, dir);
//@arch/arm/mm/dma-mapping.c
void
___dma_single_dev_to_cpu(const void *kaddr, size_t size, enum dma_data_direction dir)
{
BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
/* FIXME: non-speculating: not required */
/* don't bother invalidating if DMA to device */
//如果dir不是DMA_TO_DEVICE, invalidate cache.
if (dir != DMA_TO_DEVICE) {
unsigned long paddr = __pa(kaddr);
outer_inv_range(paddr, paddr + size);
//+-- if (outer_cache.inv_range)
// outer_cache.inv_range(start, end);
}
//取消dmac对该地址空间的映射,之后该区域CPU可以访问.
//#define dmac_unmap_area cpu_cache.dma_unmap_area
dmac_unmap_area(kaddr, size, dir);
//=--cpu_cache.dma_unmap_area(kaddr, size, dir);
}
EXPORT_SYMBOL(___dma_single_dev_to_cpu);
static inline void
dma_sync_single_for_device(struct device *dev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
+-- dma_sync_single_range_for_device(dev, handle, 0, size, dir);
+-- valid_dma_direction(dir)
+-- debug_dma_sync_single_for_device(dev, handle + offset, size, dir);
+-- dmabounce_sync_for_device(dev, handle, offset, size, dir)
+-- __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir);
+-- if (!arch_is_coherent())
___dma_single_cpu_to_dev(kaddr, size, dir);
//Make an area consistent for devices.
//Note: Drivers should NOT use this function directly, as it will break platforms with
//CONFIG_DMABOUNCE. Use the driver DMA support - see dma-mapping.h (dma_sync_*)
void
___dma_single_cpu_to_dev(const void *kaddr, size_t size, enum dma_data_direction dir)
{
unsigned long paddr;
BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1));
//恢复dmac对该地址空间的映射,之后该区域禁止CPU访问.
//#define dmac_map_area cpu_cache.dma_map_area
dmac_map_area(kaddr, size, dir);
//=-- cpu_cache.dma_map_area(kaddr, size, dir);
paddr = __pa(kaddr);
//如果dir是DMA_FROM_DEVICE, invalidate cache.否则取消chache对该区域的映射
if (dir == DMA_FROM_DEVICE) {
outer_inv_range(paddr, paddr + size);
//+-- if (outer_cache.inv_range)
// outer_cache.inv_range(start, end);
} else {
outer_clean_range(paddr, paddr + size);
//+-- if (outer_cache.clean_range)
// outer_cache.clean_range(start, end);
}
/* FIXME: non-speculating: flush on bidirectional mappings? */
}