PMEM原理分析

Chipset:MSM8x25Q

Codebase:Android 4.1


PMEM使用:

PMEM使用比较简单,分单进程使用和共享进程使用同一块PMEM。

单进程使用:

1.      int master_fd = open(“/dev/pmem_xxx”,O_RDWR, 0);

2.      然后再mmap就可以使用了。

进程间共享PMEM:

进程A:

         和单进程使用方法一样。

进程B:

1.      int fd = open(“/dev/pmem_xxx”,O_RDWR, 0);

2.      ioctl(fd, PMEM_CONNECT,master_fd)    //PMEM_CONNECT表示准备要连接了,连接的PMEM对应的fd为master_fd,即进程A打开的PMEM对应的fd。

3.      然后作mmap就可以使用了。

因此关键是第二步,master_fd是从进程A传过来的,可以使用binder等通信机制。

PMEM对应的驱动代码流程也是比较简单的,利用了字符驱动的open/ioctl/mmap操作,下面直接分析代码。

PMEM初始化:

PMEM类似于ION里的carved-outmemory,先预留一块内存,然后需要使用的时候从上面分配一块。因此PMEM总有一天会被ION替代,至少我这么认为.

平台上定义了如下三个PMEM模块:  pmem_adsp, pmem_audio, pmem(mdp_pmem)。

  1. static struct android_pmem_platform_data android_pmem_adsp_pdata = { 
  2.     .name = "pmem_adsp",    //给adsp使用 
  3.     .allocator_type = PMEM_ALLOCATORTYPE_BITMAP,    //都是使用bitmap算法,后面会讲到。 
  4.     .cached = 1
  5.     .memory_type = MEMTYPE_EBI1,    //内存类型都是EBI1 
  6. }; 
  7.  
  8. static struct platform_device android_pmem_adsp_device = { 
  9.     .name = "android_pmem"
  10.     .id = 1
  11.     .dev = { .platform_data = &android_pmem_adsp_pdata }, 
  12. }; 
  13.  
  14. static unsigned pmem_mdp_size = MSM_PMEM_MDP_SIZE
  15. static int __init pmem_mdp_size_setup(char *p) 
  16.     pmem_mdp_size = memparse(p, NULL); 
  17.     return 0; 
  18. /*可以通过传参来设置pmem mdp的size, 其他pmem模块也如此。*/ 
  19. early_param("pmem_mdp_size", pmem_mdp_size_setup); 
  20.  
  21. static unsigned pmem_adsp_size = MSM_PMEM_ADSP_SIZE
  22. static int __init pmem_adsp_size_setup(char *p) 
  23.     pmem_adsp_size = memparse(p, NULL); 
  24.     return 0; 
  25.  
  26. early_param("pmem_adsp_size", pmem_adsp_size_setup); 
  27.  
  28. static struct android_pmem_platform_data android_pmem_audio_pdata = { 
  29.     .name = "pmem_audio",       //给audio使用 
  30.     .allocator_type = PMEM_ALLOCATORTYPE_BITMAP
  31.     .cached = 0
  32.     .memory_type = MEMTYPE_EBI1
  33. }; 
  34.  
  35. static struct platform_device android_pmem_audio_device = { 
  36.     .name = "android_pmem"
  37.     .id = 2
  38.     .dev = { .platform_data = &android_pmem_audio_pdata }, 
  39. }; 
  40.  
  41. static struct android_pmem_platform_data android_pmem_pdata = { 
  42.     .name = "pmem", //给mdp使用,Quaclomm为啥不写成pmem_mdp? 
  43.     .allocator_type = PMEM_ALLOCATORTYPE_BITMAP
  44.     .cached = 1
  45.     .memory_type = MEMTYPE_EBI1
  46. }; 
  47. static struct platform_device android_pmem_device = { 
  48.     .name = "android_pmem"
  49.     .id = 0
  50.     .dev = { .platform_data = &android_pmem_pdata }, 
  51. }; 
static struct android_pmem_platform_data android_pmem_adsp_pdata = {
	.name = "pmem_adsp",	//给adsp使用
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,	//都是使用bitmap算法,后面会讲到。
	.cached = 1,
	.memory_type = MEMTYPE_EBI1,	//内存类型都是EBI1
};

static struct platform_device android_pmem_adsp_device = {
	.name = "android_pmem",
	.id = 1,
	.dev = { .platform_data = &android_pmem_adsp_pdata },
};

static unsigned pmem_mdp_size = MSM_PMEM_MDP_SIZE;
static int __init pmem_mdp_size_setup(char *p)
{
	pmem_mdp_size = memparse(p, NULL);
	return 0;
}
/*可以通过传参来设置pmem mdp的size, 其他pmem模块也如此。*/
early_param("pmem_mdp_size", pmem_mdp_size_setup);

static unsigned pmem_adsp_size = MSM_PMEM_ADSP_SIZE;
static int __init pmem_adsp_size_setup(char *p)
{
	pmem_adsp_size = memparse(p, NULL);
	return 0;
}

early_param("pmem_adsp_size", pmem_adsp_size_setup);

static struct android_pmem_platform_data android_pmem_audio_pdata = {
	.name = "pmem_audio",		//给audio使用
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,
	.cached = 0,
	.memory_type = MEMTYPE_EBI1,
};

static struct platform_device android_pmem_audio_device = {
	.name = "android_pmem",
	.id = 2,
	.dev = { .platform_data = &android_pmem_audio_pdata },
};

static struct android_pmem_platform_data android_pmem_pdata = {
	.name = "pmem",	//给mdp使用,Quaclomm为啥不写成pmem_mdp?
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,
	.cached = 1,
	.memory_type = MEMTYPE_EBI1,
};
static struct platform_device android_pmem_device = {
	.name = "android_pmem",
	.id = 0,
	.dev = { .platform_data = &android_pmem_pdata },
};

有了相应的platform_device之后,肯定要找到其platform_driver作设备匹配去。对应文件是pmem.c

  1. static int __init pmem_init(void) 
  2.     /*创建sysfs,位于/sys/kernel/ pmem_regions ,以每个PMEM模块的名字 
  3. 命名,如pmem_audio。目录下的信息主要供用户空间查看当前PMEM模块的使用状况。*/ 
  4.     /* create /sys/kernel/<PMEM_SYSFS_DIR_NAME> directory */ 
  5.     pmem_kset = kset_create_and_add(PMEM_SYSFS_DIR_NAME, 
  6.         NULL, kernel_kobj); 
  7.     if (!pmem_kset) { 
  8.         pr_err("pmem(%s):kset_create_and_add fail\n", __func__); 
  9.         return -ENOMEM; 
  10.     } 
  11.     /*寻找platform device,接着调用pmem_probe */ 
  12.     return platform_driver_register(&pmem_driver); 
  13.  
  14. static struct platform_driver pmem_driver = { 
  15.     .probe = pmem_probe
  16.     .remove = pmem_remove
  17.     .driver = { .name = "android_pmem"
  18.             .pm = &pmem_dev_pm_ops, 
  19.   } 
  20. }; 
  21. static int pmem_probe(struct platform_device *pdev) 
  22.     struct android_pmem_platform_data *pdata; 
  23.  
  24.     if (!pdev || !pdev->dev.platform_data) { 
  25.         pr_alert("Unable to probe pmem!\n"); 
  26.         return -1; 
  27.     } 
  28.     pdata = pdev->dev.platform_data; 
  29.     /*power manager相关,这里不关注。*/ 
  30.     pm_runtime_set_active(&pdev->dev); 
  31.     pm_runtime_enable(&pdev->dev); 
  32.     /*千呼万唤始出来,pmem初始化。*/ 
  33.     return pmem_setup(pdata, NULL, NULL); 
  34.  
  35. Pmem_setup()此函数比较长,不过流程还是比较简单的。 
  36. int pmem_setup(struct android_pmem_platform_data *pdata, 
  37.            long (*ioctl)(struct file *, unsigned int, unsigned long), 
  38.            int (*release)(struct inode *, struct file *)) 
  39.     int i, index = 0, id; 
  40.     struct vm_struct *pmem_vma = NULL
  41.     struct page *page; 
  42.     /*系统对设备总的pmem模块数量有限制。*/ 
  43.     if (id_count >= PMEM_MAX_DEVICES) { 
  44.         pr_alert("pmem: %s: unable to register driver(%s) - no more " 
  45.             "devices available!\n", __func__, pdata->name); 
  46.         goto err_no_mem; 
  47.     } 
  48.     /*size为0表示在系统初始化的时候并没有预留一部分内存空间给此PMEM模块。如果这样肯定会申请失败的。*/ 
  49.     if (!pdata->size) { 
  50.         pr_alert("pmem: %s: unable to register pmem driver(%s) - zero " 
  51.             "size passed in!\n", __func__, pdata->name); 
  52.         goto err_no_mem; 
  53.     } 
  54.  
  55.     id = id_count++; 
  56.     /*PMEM通过id来寻找对应的pmem模块*/ 
  57.     pmem[id].id = id
  58.     /*表示已经分配过了。*/ 
  59.     if (pmem[id].allocate) { 
  60.         pr_alert("pmem: %s: unable to register pmem driver - " 
  61.             "duplicate registration of %s!\n", 
  62.             __func__, pdata->name); 
  63.         goto err_no_mem; 
  64.     } 
  65.     /*PMEM支持多种不同的allocate算法,在下面的switch case语句中可看到,本平台都使用默认的bitmap算法,对应的allocate type为PMEM_ALLOCATORTYPE_BITMAP。 */ 
  66.     pmem[id].allocator_type = pdata->allocator_type; 
  67.     /*quantum是bitmap的计算单位,最小为PAGE_SIZE,当然你也可以在 
  68. android_pmem_platform_data 结构中自己定义大小。*/ 
  69.     /* 'quantum' is a "hidden" variable that defaults to 0 in the board 
  70.      * files */ 
  71.     pmem[id].quantum = pdata->quantum ?: PMEM_MIN_ALLOC; 
  72.     if (pmem[id].quantum < PMEM_MIN_ALLOC || 
  73.         !is_power_of_2(pmem[id].quantum)) { 
  74.         pr_alert("pmem: %s: unable to register pmem driver %s - " 
  75.             "invalid quantum value (%#x)!\n", 
  76.             __func__, pdata->name, pmem[id].quantum); 
  77.         goto err_reset_pmem_info; 
  78.     } 
  79.     /*预留的PMEM模块size必须要以quantum对齐。*/ 
  80.     if (pdata->size % pmem[id].quantum) { 
  81.         /* bad alignment for size! */ 
  82.         pr_alert("pmem: %s: Unable to register driver %s - " 
  83.             "memory region size (%#lx) is not a multiple of " 
  84.             "quantum size(%#x)!\n", __func__, pdata->name, 
  85.             pdata->size, pmem[id].quantum); 
  86.         goto err_reset_pmem_info; 
  87.     } 
  88.  
  89.     pmem[id].cached = pdata->cached; //高速缓冲标志 
  90.     pmem[id].buffered = pdata->buffered; //写缓存标志 
  91.     pmem[id].size = pdata->size; 
  92.     pmem[id].memory_type = pdata->memory_type;   /*系统使用的是EBI1接口的DDR,所以前面type定义都是*/ 
  93.     strlcpy(pmem[id].name, pdata->name, PMEM_NAME_SIZE); 
  94.     /*PMEM模块可用的内存entries,以quantum为单位。*/ 
  95.     pmem[id].num_entries = pmem[id].size / pmem[id].quantum; 
  96.  
  97.     memset(&pmem[id].kobj, 0, sizeof(pmem[0].kobj)); 
  98.     pmem[id].kobj.kset = pmem_kset
  99.     /*我们只用到bitmap算法,其他的有兴趣自己可研究。*/ 
  100.     switch (pmem[id].allocator_type) { 
  101.     case PMEM_ALLOCATORTYPE_ALLORNOTHING: 
  102. ~~snip 
  103.         break; 
  104.     case PMEM_ALLOCATORTYPE_BUDDYBESTFIT: 
  105. ~~snip 
  106.         break; 
  107.     case PMEM_ALLOCATORTYPE_BITMAP: /* 0, default if not explicit */ 
  108.         /*先分配64个bitm_alloc结构,用于后面管理PMEM模块的申请。 
  109. 因为用户空间可能不会一下子申请调整个PMEM模块,如有两个进程都申请pmem_audio模块的一小部分内存。PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS 为64.*/ 
  110.         pmem[id].allocator.bitmap.bitm_alloc = kmalloc
  111.             PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS * 
  112.                 sizeof(*pmem[id].allocator.bitmap.bitm_alloc), 
  113.             GFP_KERNEL); 
  114. ~~snip 
  115.         /*初始化 bitm_alloc结构体。*/ 
  116.         for (i = 0; i < PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS; i++) { 
  117.             pmem[id].allocator.bitmap.bitm_alloc[i].bit = -1; 
  118.             pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0
  119.         } 
  120.         /*记录当前已经申请的bitm_alloc数量。*/ 
  121.         pmem[id].allocator.bitmap.bitmap_allocs
  122.             PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS
  123.         /*以32为单位记录当前整个PMEM模块的内存数量。*/ 
  124.         pmem[id].allocator.bitmap.bitmap
  125.             kcalloc((pmem[id].num_entries + 31) / 32, 
  126.                 sizeof(unsigned int), GFP_KERNEL); 
  127.         if (!pmem[id].allocator.bitmap.bitmap) { 
  128.             pr_alert("pmem: %s: Unable to register pmem " 
  129.                 "driver - can't allocate bitmap!\n", 
  130.                 __func__); 
  131.             goto err_cant_register_device; 
  132.         } 
  133.         /*当前空闲的entries。*/ 
  134.         pmem[id].allocator.bitmap.bitmap_free = pmem[id].num_entries; 
  135.         /*下面这几个函数会在用户空间通过open/ioctl/mmap用到。*/ 
  136.         pmem[id].allocate = pmem_allocator_bitmap
  137.         pmem[id].free = pmem_free_bitmap
  138.         pmem[id].free_space = pmem_free_space_bitmap
  139.         pmem[id].len = pmem_len_bitmap
  140.         pmem[id].start_addr = pmem_start_addr_bitmap
  141.  
  142.         DLOG("bitmap allocator id %d (%s), num_entries %u, raw size " 
  143.             "%lu, quanta size %u\n", 
  144.             id, pdata->name, pmem[id].allocator.bitmap.bitmap_free, 
  145.             pmem[id].size, pmem[id].quantum); 
  146.         break; 
  147.  
  148.     case PMEM_ALLOCATORTYPE_SYSTEM: 
  149. ~~snip 
  150.     } 
  151.  
  152.     pmem[id].ioctl = ioctl
  153.     pmem[id].release = release
  154.     mutex_init(&pmem[id].arena_mutex); 
  155.     mutex_init(&pmem[id].data_list_mutex); 
  156.     INIT_LIST_HEAD(&pmem[id].data_list); 
  157.  
  158.     pmem[id].dev.name = pdata->name; 
  159.     pmem[id].dev.minor = id;    //后面使用id来寻找对应的pmem模块。 
  160.     pmem[id].dev.fops = &pmem_fops; //后面用户空间的操作都会调用的这个变量里的函数指针了。 
  161.     pmem[id].reusable = pdata->reusable; 
  162.     pr_info("pmem: Initializing %s as %s\n", 
  163.         pdata->name, pdata->cached ? "cached" : "non-cached"); 
  164.     /*注册为字符设备,会看到/dev/pmem_**, 如/dev/pmem_audio,供用户空间 
  165. 操作设备。*/ 
  166.     if (misc_register(&pmem[id].dev)) { 
  167.         pr_alert("Unable to register pmem driver!\n"); 
  168.         goto err_cant_register_device; 
  169.     } 
  170. ~~snip 
  171.     page = alloc_page(GFP_KERNEL); 
  172.     if (!page) { 
  173.         pr_err("pmem: Failed to allocate page for %s\n", pdata->name); 
  174.         goto cleanup_vm; 
  175.     } 
  176.     pmem[id].garbage_pfn = page_to_pfn(page); 
  177.     atomic_set(&pmem[id].allocation_cnt, 0); 
  178.  
  179.     if (pdata->setup_region) 
  180.         pmem[id].region_data = pdata->setup_region(); 
  181.  
  182.     if (pdata->request_region) 
  183.         pmem[id].mem_request = pdata->request_region; 
  184.  
  185.     if (pdata->release_region) 
  186.         pmem[id].mem_release = pdata->release_region; 
  187.  
  188.     pr_info("allocating %lu bytes at %lx physical for %s\n", 
  189.         pmem[id].size, pmem[id].base, pmem[id].name); 
  190.  
  191.     return 0; 
  192.  
  193. ~~snip 
  194.     return -1; 
static int __init pmem_init(void)
{
	/*创建sysfs,位于/sys/kernel/ pmem_regions ,以每个PMEM模块的名字
命名,如pmem_audio。目录下的信息主要供用户空间查看当前PMEM模块的使用状况。*/
	/* create /sys/kernel/<PMEM_SYSFS_DIR_NAME> directory */
	pmem_kset = kset_create_and_add(PMEM_SYSFS_DIR_NAME,
		NULL, kernel_kobj);
	if (!pmem_kset) {
		pr_err("pmem(%s):kset_create_and_add fail\n", __func__);
		return -ENOMEM;
	}
	/*寻找platform device,接着调用pmem_probe */
	return platform_driver_register(&pmem_driver);
}

static struct platform_driver pmem_driver = {
	.probe = pmem_probe,
	.remove = pmem_remove,
	.driver = { .name = "android_pmem",
		    .pm = &pmem_dev_pm_ops,
  }
};
static int pmem_probe(struct platform_device *pdev)
{
	struct android_pmem_platform_data *pdata;

	if (!pdev || !pdev->dev.platform_data) {
		pr_alert("Unable to probe pmem!\n");
		return -1;
	}
	pdata = pdev->dev.platform_data;
	/*power manager相关,这里不关注。*/
	pm_runtime_set_active(&pdev->dev);
	pm_runtime_enable(&pdev->dev);
	/*千呼万唤始出来,pmem初始化。*/
	return pmem_setup(pdata, NULL, NULL);
}

Pmem_setup()此函数比较长,不过流程还是比较简单的。
int pmem_setup(struct android_pmem_platform_data *pdata,
	       long (*ioctl)(struct file *, unsigned int, unsigned long),
	       int (*release)(struct inode *, struct file *))
{
	int i, index = 0, id;
	struct vm_struct *pmem_vma = NULL;
	struct page *page;
	/*系统对设备总的pmem模块数量有限制。*/
	if (id_count >= PMEM_MAX_DEVICES) {
		pr_alert("pmem: %s: unable to register driver(%s) - no more "
			"devices available!\n", __func__, pdata->name);
		goto err_no_mem;
	}
	/*size为0表示在系统初始化的时候并没有预留一部分内存空间给此PMEM模块。如果这样肯定会申请失败的。*/
	if (!pdata->size) {
		pr_alert("pmem: %s: unable to register pmem driver(%s) - zero "
			"size passed in!\n", __func__, pdata->name);
		goto err_no_mem;
	}

	id = id_count++;
	/*PMEM通过id来寻找对应的pmem模块*/
	pmem[id].id = id;
	/*表示已经分配过了。*/
	if (pmem[id].allocate) {
		pr_alert("pmem: %s: unable to register pmem driver - "
			"duplicate registration of %s!\n",
			__func__, pdata->name);
		goto err_no_mem;
	}
	/*PMEM支持多种不同的allocate算法,在下面的switch case语句中可看到,本平台都使用默认的bitmap算法,对应的allocate type为PMEM_ALLOCATORTYPE_BITMAP。 */
	pmem[id].allocator_type = pdata->allocator_type;
	/*quantum是bitmap的计算单位,最小为PAGE_SIZE,当然你也可以在
android_pmem_platform_data 结构中自己定义大小。*/
	/* 'quantum' is a "hidden" variable that defaults to 0 in the board
	 * files */
	pmem[id].quantum = pdata->quantum ?: PMEM_MIN_ALLOC;
	if (pmem[id].quantum < PMEM_MIN_ALLOC ||
		!is_power_of_2(pmem[id].quantum)) {
		pr_alert("pmem: %s: unable to register pmem driver %s - "
			"invalid quantum value (%#x)!\n",
			__func__, pdata->name, pmem[id].quantum);
		goto err_reset_pmem_info;
	}
	/*预留的PMEM模块size必须要以quantum对齐。*/
	if (pdata->size % pmem[id].quantum) {
		/* bad alignment for size! */
		pr_alert("pmem: %s: Unable to register driver %s - "
			"memory region size (%#lx) is not a multiple of "
			"quantum size(%#x)!\n", __func__, pdata->name,
			pdata->size, pmem[id].quantum);
		goto err_reset_pmem_info;
	}

	pmem[id].cached = pdata->cached;	//高速缓冲标志
	pmem[id].buffered = pdata->buffered;	//写缓存标志
	pmem[id].size = pdata->size;
	pmem[id].memory_type = pdata->memory_type;	/*系统使用的是EBI1接口的DDR,所以前面type定义都是*/
	strlcpy(pmem[id].name, pdata->name, PMEM_NAME_SIZE);
	/*PMEM模块可用的内存entries,以quantum为单位。*/
	pmem[id].num_entries = pmem[id].size / pmem[id].quantum;

	memset(&pmem[id].kobj, 0, sizeof(pmem[0].kobj));
	pmem[id].kobj.kset = pmem_kset;
	/*我们只用到bitmap算法,其他的有兴趣自己可研究。*/
	switch (pmem[id].allocator_type) {
	case PMEM_ALLOCATORTYPE_ALLORNOTHING:
~~snip
		break;
	case PMEM_ALLOCATORTYPE_BUDDYBESTFIT:
~~snip
		break;
	case PMEM_ALLOCATORTYPE_BITMAP: /* 0, default if not explicit */
		/*先分配64个bitm_alloc结构,用于后面管理PMEM模块的申请。
因为用户空间可能不会一下子申请调整个PMEM模块,如有两个进程都申请pmem_audio模块的一小部分内存。PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS 为64.*/
		pmem[id].allocator.bitmap.bitm_alloc = kmalloc(
			PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS *
				sizeof(*pmem[id].allocator.bitmap.bitm_alloc),
			GFP_KERNEL);
~~snip
		/*初始化 bitm_alloc结构体。*/
		for (i = 0; i < PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS; i++) {
			pmem[id].allocator.bitmap.bitm_alloc[i].bit = -1;
			pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0;
		}
		/*记录当前已经申请的bitm_alloc数量。*/
		pmem[id].allocator.bitmap.bitmap_allocs =
			PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS;
		/*以32为单位记录当前整个PMEM模块的内存数量。*/
		pmem[id].allocator.bitmap.bitmap =
			kcalloc((pmem[id].num_entries + 31) / 32,
				sizeof(unsigned int), GFP_KERNEL);
		if (!pmem[id].allocator.bitmap.bitmap) {
			pr_alert("pmem: %s: Unable to register pmem "
				"driver - can't allocate bitmap!\n",
				__func__);
			goto err_cant_register_device;
		}
		/*当前空闲的entries。*/
		pmem[id].allocator.bitmap.bitmap_free = pmem[id].num_entries;
		/*下面这几个函数会在用户空间通过open/ioctl/mmap用到。*/
		pmem[id].allocate = pmem_allocator_bitmap;
		pmem[id].free = pmem_free_bitmap;
		pmem[id].free_space = pmem_free_space_bitmap;
		pmem[id].len = pmem_len_bitmap;
		pmem[id].start_addr = pmem_start_addr_bitmap;

		DLOG("bitmap allocator id %d (%s), num_entries %u, raw size "
			"%lu, quanta size %u\n",
			id, pdata->name, pmem[id].allocator.bitmap.bitmap_free,
			pmem[id].size, pmem[id].quantum);
		break;

	case PMEM_ALLOCATORTYPE_SYSTEM:
~~snip
	}

	pmem[id].ioctl = ioctl;
	pmem[id].release = release;
	mutex_init(&pmem[id].arena_mutex);
	mutex_init(&pmem[id].data_list_mutex);
	INIT_LIST_HEAD(&pmem[id].data_list);

	pmem[id].dev.name = pdata->name;
	pmem[id].dev.minor = id;	//后面使用id来寻找对应的pmem模块。
	pmem[id].dev.fops = &pmem_fops;	//后面用户空间的操作都会调用的这个变量里的函数指针了。
	pmem[id].reusable = pdata->reusable;
	pr_info("pmem: Initializing %s as %s\n",
		pdata->name, pdata->cached ? "cached" : "non-cached");
	/*注册为字符设备,会看到/dev/pmem_**, 如/dev/pmem_audio,供用户空间
操作设备。*/
	if (misc_register(&pmem[id].dev)) {
		pr_alert("Unable to register pmem driver!\n");
		goto err_cant_register_device;
	}
~~snip
	page = alloc_page(GFP_KERNEL);
	if (!page) {
		pr_err("pmem: Failed to allocate page for %s\n", pdata->name);
		goto cleanup_vm;
	}
	pmem[id].garbage_pfn = page_to_pfn(page);
	atomic_set(&pmem[id].allocation_cnt, 0);

	if (pdata->setup_region)
		pmem[id].region_data = pdata->setup_region();

	if (pdata->request_region)
		pmem[id].mem_request = pdata->request_region;

	if (pdata->release_region)
		pmem[id].mem_release = pdata->release_region;

	pr_info("allocating %lu bytes at %lx physical for %s\n",
		pmem[id].size, pmem[id].base, pmem[id].name);

	return 0;

~~snip
	return -1;
}

PMEM使用驱动分析:

open

当用户进程open pmem设备的时候,会调用到pmem_open:

  1. static int pmem_open(struct inode *inode, struct file *file) 
  2.     struct pmem_data *data; 
  3.     int id = get_id(file); 
  4.     int ret = 0
  5. #if PMEM_DEBUG_MSGS 
  6.     char currtask_name[FIELD_SIZEOF(struct task_struct, comm) + 1]; 
  7. #endif 
  8.  
  9.     DLOG("pid %u(%s) file %p(%ld) dev %s(id: %d)\n", 
  10.         current->pid, get_task_comm(currtask_name, current), 
  11.         file, file_count(file), get_name(file), id); 
  12.     /*分配struct pmem_data。*/ 
  13.     data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL); 
  14.     if (!data) { 
  15.         printk(KERN_ALERT "pmem: %s: unable to allocate memory for " 
  16.                 "pmem metadata.", __func__); 
  17.         return -1; 
  18.     } 
  19.     data->flags = 0
  20.     data->index = -1; 
  21.     data->task = NULL
  22.     data->vma = NULL
  23.     data->pid = 0
  24.     data->master_file = NULL
  25. #if PMEM_DEBUG 
  26.     data->ref = 0
  27. #endif 
  28.     INIT_LIST_HEAD(&data->region_list); 
  29.     init_rwsem(&data->sem); 
  30.  
  31.     file->private_data = data
  32.     INIT_LIST_HEAD(&data->list); 
  33.  
  34.     mutex_lock(&pmem[id].data_list_mutex); 
  35.     list_add(&data->list, &pmem[id].data_list); 
  36.     mutex_unlock(&pmem[id].data_list_mutex); 
  37.     return ret; 
static int pmem_open(struct inode *inode, struct file *file)
{
	struct pmem_data *data;
	int id = get_id(file);
	int ret = 0;
#if PMEM_DEBUG_MSGS
	char currtask_name[FIELD_SIZEOF(struct task_struct, comm) + 1];
#endif

	DLOG("pid %u(%s) file %p(%ld) dev %s(id: %d)\n",
		current->pid, get_task_comm(currtask_name, current),
		file, file_count(file), get_name(file), id);
	/*分配struct pmem_data。*/
	data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL);
	if (!data) {
		printk(KERN_ALERT "pmem: %s: unable to allocate memory for "
				"pmem metadata.", __func__);
		return -1;
	}
	data->flags = 0;
	data->index = -1;
	data->task = NULL;
	data->vma = NULL;
	data->pid = 0;
	data->master_file = NULL;
#if PMEM_DEBUG
	data->ref = 0;
#endif
	INIT_LIST_HEAD(&data->region_list);
	init_rwsem(&data->sem);

	file->private_data = data;
	INIT_LIST_HEAD(&data->list);

	mutex_lock(&pmem[id].data_list_mutex);
	list_add(&data->list, &pmem[id].data_list);
	mutex_unlock(&pmem[id].data_list_mutex);
	return ret;
}

Open似乎没做什么特殊事情,只是分配一个struct pmem_data,然后初始化之后保存到file的私有数据之中。

mmap

open好了之后,用户进程想要使用PMEM,通过mmap实现,对应的是Kernel中的pmem_mmap。

  1. static int pmem_mmap(struct file *file, struct vm_area_struct *vma) 
  2.     /*取出open时创建的struct pem_data.*/ 
  3.     struct pmem_data *data = file->private_data; 
  4.     int index = -1; 
  5.     /*要映射的size大小。*/ 
  6.     unsigned long vma_sizevma->vm_end - vma->vm_start; 
  7.     int ret = 0, id = get_id(file); 
  8.  
  9. ~~snip 
  10.     /* check this file isn't already mmaped, for submaps check this file 
  11.      * has never been mmaped */ 
  12.     /*如果类型为submap,也不用再mmap。这部分和进程间共享PMEM有关, 
  13. 也就是说当主进程做了mmap之后,另外一个要共享的进程就无需再mmap了。*/ 
  14.     if ((data->flags & PMEM_FLAGS_SUBMAP) || 
  15.         (data->flags & PMEM_FLAGS_UNSUBMAP)) { 
  16. #if PMEM_DEBUG 
  17.         pr_err("pmem: you can only mmap a pmem file once, " 
  18.                "this file is already mmaped. %x\n", data->flags); 
  19. #endif 
  20.         ret = -EINVAL; 
  21.         goto error; 
  22.     } 
  23.     /* if file->private_data == unalloced, alloc*/ 
  24.     /*index表示当前分配的位于bitmap中的索引,如果为-1就表示未分配。*/ 
  25.     if (data->index == -1) { 
  26.         mutex_lock(&pmem[id].arena_mutex); 
  27.         /*根据id号来从PMEM模块上分配一部分内存,返回在bitmap的索引。*/ 
  28.         index = pmem_allocate_from_id(id, 
  29.                 vma->vm_end - vma->vm_start, 
  30.                 SZ_4K); 
  31.         mutex_unlock(&pmem[id].arena_mutex); 
  32.         /* either no space was available or an error occured */ 
  33.         if (index == -1) { 
  34.             pr_err("pmem: mmap unable to allocate memory" 
  35.                 "on %s\n", get_name(file)); 
  36.             ret = -ENOMEM; 
  37.             goto error; 
  38.         } 
  39.         /* store the index of a successful allocation */ 
  40.         data->index = index
  41.     } 
  42.     /*分配的size不能超过整个PMEM模块长度。*/ 
  43.     if (pmem[id].len(id, data) < vma_size) { 
  44. #if PMEM_DEBUG 
  45.         pr_err("pmem: mmap size [%lu] does not match" 
  46.                " size of backing region [%lu].\n", vma_size, 
  47.                pmem[id].len(id, data)); 
  48. #endif 
  49.         ret = -EINVAL; 
  50.         goto error; 
  51.     } 
  52.     /*调用的是pmem_start_addr_bitmap 函数,返回当前在整个PMEM模块 
  53. 中的偏移。*/ 
  54.     vma->vm_pgoff = pmem[id].start_addr(id, data) >> PAGE_SHIFT; 
  55.     /*cache的禁止操作。*/ 
  56.     vma->vm_page_prot = pmem_phys_mem_access_prot(file, vma->vm_page_prot); 
  57.     /* PMEM_FLAGS_CONNECTED 在ioctl接口中会被定义,表示要共享PMEM内存。可以先看如果要共享内存,mmap做了什么。*/ 
  58.     if (data->flags & PMEM_FLAGS_CONNECTED) { 
  59.         struct pmem_region_node *region_node; 
  60.         struct list_head *elt; 
  61.     /*插入一个pfn页框到用vma中*/ 
  62.         if (pmem_map_garbage(id, vma, data, 0, vma_size)) { 
  63.             pr_alert("pmem: mmap failed in kernel!\n"); 
  64.             ret = -EAGAIN; 
  65.             goto error; 
  66.         } 
  67.     /*根据当前有多少region_list作一一映射。*/ 
  68.         list_for_each(elt, &data->region_list) { 
  69.             region_node = list_entry(elt, struct pmem_region_node, 
  70.                          list); 
  71.             DLOG("remapping file: %p %lx %lx\n", file, 
  72.                 region_node->region.offset, 
  73.                 region_node->region.len); 
  74.             if (pmem_remap_pfn_range(id, vma, data, 
  75.                          region_node->region.offset, 
  76.                          region_node->region.len)) { 
  77.                 ret = -EAGAIN; 
  78.                 goto error; 
  79.             } 
  80.         } 
  81.         /*标记当前是submap。*/ 
  82.         data->flags |= PMEM_FLAGS_SUBMAP; 
  83.         get_task_struct(current->group_leader); 
  84.         data->task = current->group_leader; 
  85.         data->vma = vma
  86. #if PMEM_DEBUG 
  87.         data->pid = current->pid; 
  88. #endif 
  89.         DLOG("submmapped file %p vma %p pid %u\n", file, vma, 
  90.              current->pid); 
  91.     } else { 
  92.         /mastermap走如下流程。映射vma_size大小到用户空间。*/ 
  93.         if (pmem_map_pfn_range(id, vma, data, 0, vma_size)) { 
  94.             pr_err("pmem: mmap failed in kernel!\n"); 
  95.             ret = -EAGAIN; 
  96.             goto error; 
  97.         } 
  98.         data->flags |= PMEM_FLAGS_MASTERMAP; 
  99.         data->pid = current->pid; 
  100.     } 
  101.     vma->vm_ops = &vm_ops; 
  102. error: 
  103.     up_write(&data->sem); 
  104.     return ret; 
static int pmem_mmap(struct file *file, struct vm_area_struct *vma)
{
	/*取出open时创建的struct pem_data.*/
	struct pmem_data *data = file->private_data;
	int index = -1;
	/*要映射的size大小。*/
	unsigned long vma_size =  vma->vm_end - vma->vm_start;
	int ret = 0, id = get_id(file);

~~snip
	/* check this file isn't already mmaped, for submaps check this file
	 * has never been mmaped */
	/*如果类型为submap,也不用再mmap。这部分和进程间共享PMEM有关,
也就是说当主进程做了mmap之后,另外一个要共享的进程就无需再mmap了。*/
	if ((data->flags & PMEM_FLAGS_SUBMAP) ||
	    (data->flags & PMEM_FLAGS_UNSUBMAP)) {
#if PMEM_DEBUG
		pr_err("pmem: you can only mmap a pmem file once, "
		       "this file is already mmaped. %x\n", data->flags);
#endif
		ret = -EINVAL;
		goto error;
	}
	/* if file->private_data == unalloced, alloc*/
	/*index表示当前分配的位于bitmap中的索引,如果为-1就表示未分配。*/
	if (data->index == -1) {
		mutex_lock(&pmem[id].arena_mutex);
		/*根据id号来从PMEM模块上分配一部分内存,返回在bitmap的索引。*/
		index = pmem_allocate_from_id(id,
				vma->vm_end - vma->vm_start,
				SZ_4K);
		mutex_unlock(&pmem[id].arena_mutex);
		/* either no space was available or an error occured */
		if (index == -1) {
			pr_err("pmem: mmap unable to allocate memory"
				"on %s\n", get_name(file));
			ret = -ENOMEM;
			goto error;
		}
		/* store the index of a successful allocation */
		data->index = index;
	}
	/*分配的size不能超过整个PMEM模块长度。*/
	if (pmem[id].len(id, data) < vma_size) {
#if PMEM_DEBUG
		pr_err("pmem: mmap size [%lu] does not match"
		       " size of backing region [%lu].\n", vma_size,
		       pmem[id].len(id, data));
#endif
		ret = -EINVAL;
		goto error;
	}
	/*调用的是pmem_start_addr_bitmap 函数,返回当前在整个PMEM模块
中的偏移。*/
	vma->vm_pgoff = pmem[id].start_addr(id, data) >> PAGE_SHIFT;
	/*cache的禁止操作。*/
	vma->vm_page_prot = pmem_phys_mem_access_prot(file, vma->vm_page_prot);
	/* PMEM_FLAGS_CONNECTED 在ioctl接口中会被定义,表示要共享PMEM内存。可以先看如果要共享内存,mmap做了什么。*/
	if (data->flags & PMEM_FLAGS_CONNECTED) {
		struct pmem_region_node *region_node;
		struct list_head *elt;
	/*插入一个pfn页框到用vma中*/
		if (pmem_map_garbage(id, vma, data, 0, vma_size)) {
			pr_alert("pmem: mmap failed in kernel!\n");
			ret = -EAGAIN;
			goto error;
		}
	/*根据当前有多少region_list作一一映射。*/
		list_for_each(elt, &data->region_list) {
			region_node = list_entry(elt, struct pmem_region_node,
						 list);
			DLOG("remapping file: %p %lx %lx\n", file,
				region_node->region.offset,
				region_node->region.len);
			if (pmem_remap_pfn_range(id, vma, data,
						 region_node->region.offset,
						 region_node->region.len)) {
				ret = -EAGAIN;
				goto error;
			}
		}
		/*标记当前是submap。*/
		data->flags |= PMEM_FLAGS_SUBMAP;
		get_task_struct(current->group_leader);
		data->task = current->group_leader;
		data->vma = vma;
#if PMEM_DEBUG
		data->pid = current->pid;
#endif
		DLOG("submmapped file %p vma %p pid %u\n", file, vma,
		     current->pid);
	} else {
		/mastermap走如下流程。映射vma_size大小到用户空间。*/
		if (pmem_map_pfn_range(id, vma, data, 0, vma_size)) {
			pr_err("pmem: mmap failed in kernel!\n");
			ret = -EAGAIN;
			goto error;
		}
		data->flags |= PMEM_FLAGS_MASTERMAP;
		data->pid = current->pid;
	}
	vma->vm_ops = &vm_ops;
error:
	up_write(&data->sem);
	return ret;
}

在这个阶段,我们主要关心的为非共享PMEM的操作,当执行了mmap之后,用户空间就直接可以操作pmem了。

上面还有个重要的函数没作分析,pmem_allocate_from_id():

  1. static int pmem_allocate_from_id(const int id, const unsigned long size, 
  2.                         const unsigned int align) 
  3.     int ret; 
  4.     ret = pmem_get_region(id); 
  5.     if (ret) 
  6.         return -1; 
  7.     /*调用的是pmem_allocator_bitmap().*/ 
  8.     ret = pmem[id].allocate(id, size, align); 
  9.     if (ret < 0
  10.         pmem_put_region(id); 
  11.     return ret; 
  12. static int pmem_get_region(int id) 
  13.     /* Must be called with arena mutex locked */ 
  14.     atomic_inc(&pmem[id].allocation_cnt); 
  15.     if (!pmem[id].vbase) { 
  16. ~~snip 
  17.         /*根据id作ioremap*/ 
  18.         ioremap_pmem(id); 
  19.     } 
  20. ~~snip 
  21. static void ioremap_pmem(int id) 
  22.     unsigned long addr; 
  23.     const struct mem_type *type; 
  24.  
  25.     DLOG("PMEMDEBUG: ioremaping for %s\n", pmem[id].name); 
  26.     if (pmem[id].map_on_demand) { 
  27. ~~snip 
  28.     } else { 
  29.     /*如果需要cache则调用ioremap_cached,否则调用ioremap。*/ 
  30.         if (pmem[id].cached) 
  31.             pmem[id].vbase = ioremap_cached(pmem[id].base, 
  32.                         pmem[id].size); 
  33. ~~snip 
  34.         else 
  35.             pmem[id].vbase = ioremap(pmem[id].base, pmem[id].size); 
  36.     } 
static int pmem_allocate_from_id(const int id, const unsigned long size,
						const unsigned int align)
{
	int ret;
	ret = pmem_get_region(id);
	if (ret)
		return -1;
	/*调用的是pmem_allocator_bitmap().*/
	ret = pmem[id].allocate(id, size, align);
	if (ret < 0)
		pmem_put_region(id);
	return ret;
}
static int pmem_get_region(int id)
{
	/* Must be called with arena mutex locked */
	atomic_inc(&pmem[id].allocation_cnt);
	if (!pmem[id].vbase) {
~~snip
		/*根据id作ioremap*/
		ioremap_pmem(id);
	}
~~snip
}
static void ioremap_pmem(int id)
{
	unsigned long addr;
	const struct mem_type *type;

	DLOG("PMEMDEBUG: ioremaping for %s\n", pmem[id].name);
	if (pmem[id].map_on_demand) {
~~snip
	} else {
	/*如果需要cache则调用ioremap_cached,否则调用ioremap。*/
		if (pmem[id].cached)
			pmem[id].vbase = ioremap_cached(pmem[id].base,
						pmem[id].size);
~~snip
		else
			pmem[id].vbase = ioremap(pmem[id].base, pmem[id].size);
	}
}
}

这样,得到PMEM模块对应的内核虚拟地址。

接着就是pmem_allocator_bitmap,看它如何利用bitmap来分配及管理内存。

  1. static int pmem_allocator_bitmap(const int id, 
  2.         const unsigned long len, 
  3.         const unsigned int align) 
  4.     /* caller should hold the lock on arena_mutex! */ 
  5.     int bitnum, i; 
  6.     unsigned int quanta_needed; 
  7.  
  8.     DLOG("bitmap id %d, len %ld, align %u\n", id, len, align); 
  9. ~~snip 
  10.     /*以quantum为单位计算要分配的内存大小。*/ 
  11.     quanta_needed = (len + pmem[id].quantum - 1) / pmem[id].quantum; 
  12.     /*超过整个pmem模块的数量则失败。*/ 
  13.     if (pmem[id].allocator.bitmap.bitmap_free < quanta_needed) { 
  14.         return -1; 
  15.     } 
  16.     /*将要申请的quanta数量再次作一个转换,因为要考虑对齐等因素。*/ 
  17.     bitnum = reserve_quanta(quanta_needed, id, align); 
  18.     if (bitnum == -1) 
  19.         goto leave; 
  20.     /*找到第一个未被使用过的bitmap的位置。*/ 
  21.     for (i = 0
  22.         i < pmem[id].allocator.bitmap.bitmap_allocs && 
  23.             pmem[id].allocator.bitmap.bitm_alloc[i].bit != -1; 
  24.         i++) 
  25.         ; 
  26.     /*如果找到的位置已经超出当前的bitmap_allocs数量,则要重新 
  27. 分配更大的一块bitm_alloc.*/ 
  28.     if (i >= pmem[id].allocator.bitmap.bitmap_allocs) { 
  29.         void *temp; 
  30.         /*申请的数量比上次大一倍。*/ 
  31.         int32_t new_bitmap_allocs
  32.             pmem[id].allocator.bitmap.bitmap_allocs << 1
  33.         int j; 
  34.  
  35.         if (!new_bitmap_allocs) { /* failed sanity check!! */ 
  36.             return -1; 
  37.         } 
  38.         /*申请数量不能大于当前PMEM模块实际的数量。*/ 
  39.         if (new_bitmap_allocs > pmem[id].num_entries) { 
  40.             /* failed sanity check!! */ 
  41.             return -1; 
  42.         } 
  43.         /*重新分配和指定。*/ 
  44.         temp = krealloc(pmem[id].allocator.bitmap.bitm_alloc, 
  45.                 new_bitmap_allocs * 
  46.                 sizeof(*pmem[id].allocator.bitmap.bitm_alloc), 
  47.                 GFP_KERNEL); 
  48.         if (!temp) { 
  49.             return -1; 
  50.         } 
  51.         pmem[id].allocator.bitmap.bitmap_allocs = new_bitmap_allocs
  52.         pmem[id].allocator.bitmap.bitm_alloc = temp
  53.         /*只对重新分配的部分作初始化。*/ 
  54.         for (j = i; j < new_bitmap_allocs; j++) { 
  55.             pmem[id].allocator.bitmap.bitm_alloc[j].bit = -1; 
  56.             pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0
  57.         } 
  58.  
  59.         DLOG("increased # of allocated regions to %d for id %d\n", 
  60.             pmem[id].allocator.bitmap.bitmap_allocs, id); 
  61.     } 
  62.  
  63.     DLOG("bitnum %d, bitm_alloc index %d\n", bitnum, i); 
  64.  
  65.     pmem[id].allocator.bitmap.bitmap_free -= quanta_needed
  66.     pmem[id].allocator.bitmap.bitm_alloc[i].bit = bitnum
  67.     pmem[id].allocator.bitmap.bitm_alloc[i].quanta = quanta_needed
  68. leave: 
  69.     return bitnum; 
  70.  
  71. static int reserve_quanta(const unsigned int quanta_needed, 
  72.         const int id, 
  73.         unsigned int align) 
  74.     /* alignment should be a valid power of 2 */ 
  75.     int ret = -1, start_bit = 0, spacing = 1
  76. ~~snip 
  77.     start_bit = bit_from_paddr(id, 
  78.         (pmem[id].base + align - 1) & ~(align - 1)); 
  79.     if (start_bit <= -1) { 
  80.         return -1; 
  81.     } 
  82.     spacing = align / pmem[id].quantum; 
  83.     spacing = spacing > 1 ? spacing : 1; 
  84.     /*从memory pool上也就是当前拥有的PMEM内存块上分配出一片 
  85. 区域来给当前申请用户进程。*/ 
  86.     ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap, 
  87.         quanta_needed, 
  88.         (pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum, 
  89.         spacing, 
  90.         start_bit); 
  91.  
  92.     return ret; 
static int pmem_allocator_bitmap(const int id,
		const unsigned long len,
		const unsigned int align)
{
	/* caller should hold the lock on arena_mutex! */
	int bitnum, i;
	unsigned int quanta_needed;

	DLOG("bitmap id %d, len %ld, align %u\n", id, len, align);
~~snip
	/*以quantum为单位计算要分配的内存大小。*/
	quanta_needed = (len + pmem[id].quantum - 1) / pmem[id].quantum;
	/*超过整个pmem模块的数量则失败。*/
	if (pmem[id].allocator.bitmap.bitmap_free < quanta_needed) {
		return -1;
	}
	/*将要申请的quanta数量再次作一个转换,因为要考虑对齐等因素。*/
	bitnum = reserve_quanta(quanta_needed, id, align);
	if (bitnum == -1)
		goto leave;
	/*找到第一个未被使用过的bitmap的位置。*/
	for (i = 0;
		i < pmem[id].allocator.bitmap.bitmap_allocs &&
			pmem[id].allocator.bitmap.bitm_alloc[i].bit != -1;
		i++)
		;
	/*如果找到的位置已经超出当前的bitmap_allocs数量,则要重新
分配更大的一块bitm_alloc.*/
	if (i >= pmem[id].allocator.bitmap.bitmap_allocs) {
		void *temp;
		/*申请的数量比上次大一倍。*/
		int32_t new_bitmap_allocs =
			pmem[id].allocator.bitmap.bitmap_allocs << 1;
		int j;

		if (!new_bitmap_allocs) { /* failed sanity check!! */
			return -1;
		}
		/*申请数量不能大于当前PMEM模块实际的数量。*/
		if (new_bitmap_allocs > pmem[id].num_entries) {
			/* failed sanity check!! */
			return -1;
		}
		/*重新分配和指定。*/
		temp = krealloc(pmem[id].allocator.bitmap.bitm_alloc,
				new_bitmap_allocs *
				sizeof(*pmem[id].allocator.bitmap.bitm_alloc),
				GFP_KERNEL);
		if (!temp) {
			return -1;
		}
		pmem[id].allocator.bitmap.bitmap_allocs = new_bitmap_allocs;
		pmem[id].allocator.bitmap.bitm_alloc = temp;
		/*只对重新分配的部分作初始化。*/
		for (j = i; j < new_bitmap_allocs; j++) {
			pmem[id].allocator.bitmap.bitm_alloc[j].bit = -1;
			pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0;
		}

		DLOG("increased # of allocated regions to %d for id %d\n",
			pmem[id].allocator.bitmap.bitmap_allocs, id);
	}

	DLOG("bitnum %d, bitm_alloc index %d\n", bitnum, i);

	pmem[id].allocator.bitmap.bitmap_free -= quanta_needed;
	pmem[id].allocator.bitmap.bitm_alloc[i].bit = bitnum;
	pmem[id].allocator.bitmap.bitm_alloc[i].quanta = quanta_needed;
leave:
	return bitnum;
}

static int reserve_quanta(const unsigned int quanta_needed,
		const int id,
		unsigned int align)
{
	/* alignment should be a valid power of 2 */
	int ret = -1, start_bit = 0, spacing = 1;
~~snip
	start_bit = bit_from_paddr(id,
		(pmem[id].base + align - 1) & ~(align - 1));
	if (start_bit <= -1) {
		return -1;
	}
	spacing = align / pmem[id].quantum;
	spacing = spacing > 1 ? spacing : 1;
	/*从memory pool上也就是当前拥有的PMEM内存块上分配出一片
区域来给当前申请用户进程。*/
	ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap,
		quanta_needed,
		(pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum,
		spacing,
		start_bit);

	return ret;
}

内存分配完成。

ioctl

PMEM提供了若干个ioctl的cmd供用户空间操作,有获取当前申请的len,获取PMEM模块的总size,申请pmem等,这里我们重点关注alloc, map, connect,其他几个很简单,可自行分析。

  1. static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 
  2.     /* called from user space as file op, so file guaranteed to be not 
  3.      * NULL 
  4.      */ 
  5.     struct pmem_data *data = file->private_data; 
  6.     int id = get_id(file); 
  7. #if PMEM_DEBUG_MSGS 
  8.     char currtask_name[ 
  9.         FIELD_SIZEOF(struct task_struct, comm) + 1]; 
  10. #endif 
  11.  
  12.     DLOG("pid %u(%s) file %p(%ld) cmd %#x, dev %s(id: %d)\n", 
  13.         current->pid, get_task_comm(currtask_name, current), 
  14.         file, file_count(file), cmd, get_name(file), id); 
  15.  
  16.     switch (cmd) { 
  17.     case PMEM_GET_PHYS: 
  18.         ~~snip 
  19.     case PMEM_MAP: 
  20.         { 
  21.             struct pmem_region region; 
  22.             DLOG("map\n"); 
  23.             if (copy_from_user(®ion, (void __user *)arg, 
  24.                         sizeof(struct pmem_region))) 
  25.                 return -EFAULT; 
  26.             return pmem_remap(®ion, file, PMEM_MAP); 
  27.         } 
  28.         break; 
  29.     case PMEM_UNMAP: 
  30.         ~~snip 
  31.     case PMEM_GET_SIZE: 
  32.         ~~snip 
  33.     case PMEM_GET_TOTAL_SIZE: 
  34.         ~~snip 
  35.     case PMEM_GET_FREE_SPACE: 
  36. ~~snip 
  37.     case PMEM_ALLOCATE: 
  38.         { 
  39.             int ret = 0
  40.             DLOG("allocate, id %d\n", id); 
  41.             down_write(&data->sem); 
  42.             if (has_allocation(file)) { 
  43.                 pr_err("pmem: Existing allocation found on " 
  44.                     "this file descrpitor\n"); 
  45.                 up_write(&data->sem); 
  46.                 return -EINVAL; 
  47.             } 
  48.  
  49.             mutex_lock(&pmem[id].arena_mutex); 
  50.             data->index = pmem_allocate_from_id(id, 
  51.                     arg, 
  52.                     SZ_4K); 
  53.             mutex_unlock(&pmem[id].arena_mutex); 
  54.             ret = data->index == -1 ? -ENOMEM : 
  55.                 data->index; 
  56.             up_write(&data->sem); 
  57.             return ret; 
  58.         } 
  59.     case PMEM_ALLOCATE_ALIGNED: 
  60.         ~~snip 
  61.     case PMEM_CONNECT: 
  62.         DLOG("connect\n"); 
  63.         return pmem_connect(arg, file); 
  64.     case PMEM_CLEAN_INV_CACHES: 
  65.     case PMEM_CLEAN_CACHES: 
  66.     case PMEM_INV_CACHES: 
  67.         ~~snip 
  68.     default: 
  69.         if (pmem[id].ioctl) 
  70.             return pmem[id].ioctl(file, cmd, arg); 
  71.  
  72.         DLOG("ioctl invalid (%#x)\n", cmd); 
  73.         return -EINVAL; 
  74.     } 
  75.     return 0; 
static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	/* called from user space as file op, so file guaranteed to be not
	 * NULL
	 */
	struct pmem_data *data = file->private_data;
	int id = get_id(file);
#if PMEM_DEBUG_MSGS
	char currtask_name[
		FIELD_SIZEOF(struct task_struct, comm) + 1];
#endif

	DLOG("pid %u(%s) file %p(%ld) cmd %#x, dev %s(id: %d)\n",
		current->pid, get_task_comm(currtask_name, current),
		file, file_count(file), cmd, get_name(file), id);

	switch (cmd) {
	case PMEM_GET_PHYS:
		~~snip
	case PMEM_MAP:
		{
			struct pmem_region region;
			DLOG("map\n");
			if (copy_from_user(®ion, (void __user *)arg,
						sizeof(struct pmem_region)))
				return -EFAULT;
			return pmem_remap(®ion, file, PMEM_MAP);
		}
		break;
	case PMEM_UNMAP:
		~~snip
	case PMEM_GET_SIZE:
		~~snip
	case PMEM_GET_TOTAL_SIZE:
		~~snip
	case PMEM_GET_FREE_SPACE:
~~snip
	case PMEM_ALLOCATE:
		{
			int ret = 0;
			DLOG("allocate, id %d\n", id);
			down_write(&data->sem);
			if (has_allocation(file)) {
				pr_err("pmem: Existing allocation found on "
					"this file descrpitor\n");
				up_write(&data->sem);
				return -EINVAL;
			}

			mutex_lock(&pmem[id].arena_mutex);
			data->index = pmem_allocate_from_id(id,
					arg,
					SZ_4K);
			mutex_unlock(&pmem[id].arena_mutex);
			ret = data->index == -1 ? -ENOMEM :
				data->index;
			up_write(&data->sem);
			return ret;
		}
	case PMEM_ALLOCATE_ALIGNED:
		~~snip
	case PMEM_CONNECT:
		DLOG("connect\n");
		return pmem_connect(arg, file);
	case PMEM_CLEAN_INV_CACHES:
	case PMEM_CLEAN_CACHES:
	case PMEM_INV_CACHES:
		~~snip
	default:
		if (pmem[id].ioctl)
			return pmem[id].ioctl(file, cmd, arg);

		DLOG("ioctl invalid (%#x)\n", cmd);
		return -EINVAL;
	}
	return 0;
}

PMEM_ALLOCATE:

咦,也是调用pmem_allocate_from_id(),在前面的mmap中,我们已经分析过了。

PMEM_CONNECT:

调用的是pmem_connect(),这个cmd主要是供另外一个进程和主进程共享PMEM用的,传进去的参数为主进程打开PMEM的fd,这里称主进程master .

  1. static int pmem_connect(unsigned long connect, struct file *file) 
  2.     int ret = 0, put_needed; 
  3.     struct file *src_file; 
  4. ~~snip 
  5.     /*根据主进程的fd获得相对应的file.*/ 
  6.     src_file = fget_light(connect, &put_needed); 
  7. ~~snip 
  8.     if (unlikely(!is_pmem_file(src_file))) { 
  9.         pr_err("pmem: %s: src file is not a pmem file!\n", 
  10.             __func__); 
  11.         ret = -EINVAL; 
  12.         goto put_src_file; 
  13.     } else { 
  14.         /*得到master的pmem data.*/ 
  15.         struct pmem_data *src_data = src_file->private_data; 
  16.  
  17.         if (!src_data) { 
  18.             pr_err("pmem: %s: src file pointer has no" 
  19.                 "private data, bailing out!\n", __func__); 
  20.             ret = -EINVAL; 
  21.             goto put_src_file; 
  22.         } 
  23.  
  24.         down_read(&src_data->sem); 
  25.  
  26.         if (unlikely(!has_allocation(src_file))) { 
  27.             up_read(&src_data->sem); 
  28.             pr_err("pmem: %s: src file has no allocation!\n", 
  29.                 __func__); 
  30.             ret = -EINVAL; 
  31.         } else { 
  32.             struct pmem_data *data; 
  33.             /*获得master分配到的内存在bitmap中的index.*/ 
  34.             int src_index = src_data->index; 
  35.  
  36.             up_read(&src_data->sem); 
  37.  
  38.             data = file->private_data; 
  39.             if (!data) { 
  40.                 pr_err("pmem: %s: passed in file " 
  41.                     "pointer has no private data, bailing" 
  42.                     " out!\n", __func__); 
  43.                 ret = -EINVAL; 
  44.                 goto put_src_file; 
  45.             } 
  46.  
  47.             down_write(&data->sem); 
  48.             if (has_allocation(file) && 
  49.                     (data->index != src_index)) { 
  50.                 up_write(&data->sem); 
  51.  
  52.                 pr_err("pmem: %s: file is already " 
  53.                     "mapped but doesn't match this " 
  54.                     "src_file!\n", __func__); 
  55.                 ret = -EINVAL; 
  56.             } else { 
  57.                 /*将master的pmem data数据保存到当前进程中。*/ 
  58.                 data->index = src_index
  59.                 data->flags |= PMEM_FLAGS_CONNECTED;     //设置标志,在mmap中用到。 
  60.                 data->master_fd = connect
  61.                 data->master_file = src_file
  62.  
  63.                 up_write(&data->sem); 
  64.  
  65.                 DLOG("connect %p to %p\n", file, src_file); 
  66.             } 
  67.         } 
  68.     } 
  69. put_src_file: 
  70.     fput_light(src_file, put_needed); 
  71. leave: 
  72.     return ret; 
static int pmem_connect(unsigned long connect, struct file *file)
{
	int ret = 0, put_needed;
	struct file *src_file;
~~snip
	/*根据主进程的fd获得相对应的file.*/
	src_file = fget_light(connect, &put_needed);
~~snip
	if (unlikely(!is_pmem_file(src_file))) {
		pr_err("pmem: %s: src file is not a pmem file!\n",
			__func__);
		ret = -EINVAL;
		goto put_src_file;
	} else {
		/*得到master的pmem data.*/
		struct pmem_data *src_data = src_file->private_data;

		if (!src_data) {
			pr_err("pmem: %s: src file pointer has no"
				"private data, bailing out!\n", __func__);
			ret = -EINVAL;
			goto put_src_file;
		}

		down_read(&src_data->sem);

		if (unlikely(!has_allocation(src_file))) {
			up_read(&src_data->sem);
			pr_err("pmem: %s: src file has no allocation!\n",
				__func__);
			ret = -EINVAL;
		} else {
			struct pmem_data *data;
			/*获得master分配到的内存在bitmap中的index.*/
			int src_index = src_data->index;

			up_read(&src_data->sem);

			data = file->private_data;
			if (!data) {
				pr_err("pmem: %s: passed in file "
					"pointer has no private data, bailing"
					" out!\n", __func__);
				ret = -EINVAL;
				goto put_src_file;
			}

			down_write(&data->sem);
			if (has_allocation(file) &&
					(data->index != src_index)) {
				up_write(&data->sem);

				pr_err("pmem: %s: file is already "
					"mapped but doesn't match this "
					"src_file!\n", __func__);
				ret = -EINVAL;
			} else {
				/*将master的pmem data数据保存到当前进程中。*/
				data->index = src_index;
				data->flags |= PMEM_FLAGS_CONNECTED;		//设置标志,在mmap中用到。
				data->master_fd = connect;
				data->master_file = src_file;

				up_write(&data->sem);

				DLOG("connect %p to %p\n", file, src_file);
			}
		}
	}
put_src_file:
	fput_light(src_file, put_needed);
leave:
	return ret;
}

嗯,这个cmd就是将master的struct pmem_data给了当前要共享的进程。

PMEM_MAP:

这个接口是为了用空进程想要执行remap而开设的,不过我想不到什么时候要重新映射呢?

  1. int pmem_remap(struct pmem_region *region, struct file *file, 
  2.               unsigned operation) 
  3.     int ret; 
  4.     struct pmem_region_node *region_node; 
  5.     struct mm_struct *mm = NULL
  6.     struct list_head *elt, *elt2; 
  7.     int id = get_id(file); 
  8.     struct pmem_data *data; 
  9. ~~snip 
  10.  
  11.     /* is_pmem_file fails if !file */ 
  12.     data = file->private_data; 
  13. ~~snip 
  14.     /* lock the mm and data */ 
  15.     ret = pmem_lock_data_and_mm(file, data, &mm); 
  16.     if (ret) 
  17.         return 0; 
  18. /*明确指定只有master file才能作remap动作。*/ 
  19.     /* only the owner of the master file can remap the client fds 
  20.      * that back in it */ 
  21.     if (!is_master_owner(file)) { 
  22.         ret = -EINVAL; 
  23.         goto err; 
  24.     } 
  25. ~~snip 
  26.     if (operation == PMEM_MAP) { 
  27.     /*生成一个struct pem_region_node,用来保存上层传下来的region信息。*/ 
  28.         region_node = kmalloc(sizeof(struct pmem_region_node), 
  29.                   GFP_KERNEL); 
  30.         if (!region_node) { 
  31.             ret = -ENOMEM; 
  32.             goto err; 
  33.         } 
  34.         region_node->region = *region; 
  35.         /*添加到data的region_list中。*/ 
  36.         list_add(®ion_node->list, &data->region_list); 
  37.     } else if (operation == PMEM_UNMAP) { 
  38.         int found = 0
  39.         list_for_each_safe(elt, elt2, &data->region_list) { 
  40.             region_node = list_entry(elt, struct pmem_region_node, 
  41.                       list); 
  42.             if (region->len == 0 || 
  43.                 (region_node->region.offset == region->offset && 
  44.                 region_node->region.len == region->len)) { 
  45.                 list_del(elt); 
  46.                 kfree(region_node); 
  47.                 found = 1
  48.             } 
  49.         } 
  50. ~~snip 
  51.     /*比较好奇PMEM_FLAGS_SUBMAP是想共享的进程设置的,但是前面的判断又是 
  52. 只能master file才能作remap,这样岂不是永远执行不了?*/ 
  53.     if (data->vma && PMEM_IS_SUBMAP(data)) { 
  54.         if (operation == PMEM_MAP) 
  55.             ret = pmem_remap_pfn_range(id, data->vma, data, 
  56.                    region->offset, region->len); 
  57.         else if (operation == PMEM_UNMAP) 
  58.             ret = pmem_unmap_pfn_range(id, data->vma, data, 
  59.                    region->offset, region->len); 
  60.     } 
  61.  
  62. err: 
  63.     pmem_unlock_data_and_mm(data, mm); 
  64.     return ret; 
int pmem_remap(struct pmem_region *region, struct file *file,
		      unsigned operation)
{
	int ret;
	struct pmem_region_node *region_node;
	struct mm_struct *mm = NULL;
	struct list_head *elt, *elt2;
	int id = get_id(file);
	struct pmem_data *data;
~~snip

	/* is_pmem_file fails if !file */
	data = file->private_data;
~~snip
	/* lock the mm and data */
	ret = pmem_lock_data_and_mm(file, data, &mm);
	if (ret)
		return 0;
/*明确指定只有master file才能作remap动作。*/
	/* only the owner of the master file can remap the client fds
	 * that back in it */
	if (!is_master_owner(file)) {
		ret = -EINVAL;
		goto err;
	}
~~snip
	if (operation == PMEM_MAP) {
	/*生成一个struct pem_region_node,用来保存上层传下来的region信息。*/
		region_node = kmalloc(sizeof(struct pmem_region_node),
			      GFP_KERNEL);
		if (!region_node) {
			ret = -ENOMEM;
			goto err;
		}
		region_node->region = *region;
		/*添加到data的region_list中。*/
		list_add(®ion_node->list, &data->region_list);
	} else if (operation == PMEM_UNMAP) {
		int found = 0;
		list_for_each_safe(elt, elt2, &data->region_list) {
			region_node = list_entry(elt, struct pmem_region_node,
				      list);
			if (region->len == 0 ||
			    (region_node->region.offset == region->offset &&
			    region_node->region.len == region->len)) {
				list_del(elt);
				kfree(region_node);
				found = 1;
			}
		}
~~snip
	/*比较好奇PMEM_FLAGS_SUBMAP是想共享的进程设置的,但是前面的判断又是
只能master file才能作remap,这样岂不是永远执行不了?*/
	if (data->vma && PMEM_IS_SUBMAP(data)) {
		if (operation == PMEM_MAP)
			ret = pmem_remap_pfn_range(id, data->vma, data,
				   region->offset, region->len);
		else if (operation == PMEM_UNMAP)
			ret = pmem_unmap_pfn_range(id, data->vma, data,
				   region->offset, region->len);
	}

err:
	pmem_unlock_data_and_mm(data, mm);
	return ret;
}

2013/03/04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值