再读uclinux-2008r1(bf561)内核存储区域管理（3）：zone初始化

最新推荐文章于 2022-08-14 21:35:53 发布

嵌云阁主

最新推荐文章于 2022-08-14 21:35:53 发布

阅读量1.3k

点赞数

分类专栏： bf561-uclinux 文章标签：存储 table struct list 数据结构 initialization

本文链接：https://blog.csdn.net/lights_joy/article/details/2709236

版权

bf561-uclinux 专栏收录该内容

390 篇文章 1 订阅

订阅专栏

快乐虾

http://blog.csdn.net/lights_joy/

lights@hb165.com

本文适用于

ADI bf561 DSP

优视BF561EVB开发板

uclinux-2008r1-rc8 (移植到vdsp5)

Visual DSP++ 5.0

欢迎转载，但请保留作者信息

1.1.1 zone初始化

在对pglist_data中页表数量入页面描述初始化完成之后，转入对可用的zone的初始化，当然，实际只使用了ZONE_DMA这个区域。

1.1.1.1 free_area_init_core

这个函数也在mm/page_alloc.c中，其代码如下：

* Set up the zone data structures:

* - mark all pages reserved

* - mark all memory queues empty

* - clear the memory bitmaps

static void __meminit free_area_init_core(struct pglist_data *pgdat,

unsigned long *zones_size, unsigned long *zholes_size)

{

enum zone_type j;

int nid = pgdat->node_id;

unsigned long zone_start_pfn = pgdat->node_start_pfn;

int ret;

// 空语句，啥也不做

pgdat_resize_init(pgdat);

pgdat->nr_zones = 0;

// 初始化kswapd_wait这个链表，不过加上了spinlock的支持

init_waitqueue_head(&pgdat->kswapd_wait);

pgdat->kswapd_max_order = 0;

// MAX_NR_ZONES的值为，不过实际只使用ZONE_DMA

for (j = 0; j < MAX_NR_ZONES; j++) {

struct zone *zone = pgdat->node_zones + j;

unsigned long size, realsize, memmap_pages;

// size = realsize = SDRAM的页表数量，对 64M SDRAM(限制为 60M )，其值为x3bff

size = zone_spanned_pages_in_node(nid, j, zones_size);

realsize = size - zone_absent_pages_in_node(nid, j,

zholes_size);

* Adjust realsize so that it accounts for how much memory

* is used by this zone for memmap. This affects the watermark

* and per-cpu initialisations

memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;

if (realsize >= memmap_pages) {

realsize -= memmap_pages;

printk(KERN_DEBUG

" %s zone: %lu pages used for memmap/n",

zone_names[j], memmap_pages);

} else

printk(KERN_WARNING

" %s zone: %lu pages exceeds realsize %lu/n",

zone_names[j], memmap_pages, realsize);

/* Account for reserved pages */

// dma_reserve的值可以从引导程序导入，在此为0

if (j == 0 && realsize > dma_reserve) {

realsize -= dma_reserve;

printk(KERN_DEBUG " %s zone: %lu pages reserved/n",

zone_names[0], dma_reserve);

}

// is_highmem_idx恒为0

if (!is_highmem_idx(j))

nr_kernel_pages += realsize;

nr_all_pages += realsize;

zone->spanned_pages = size;

zone->present_pages = realsize;

zone->name = zone_names[j];

spin_lock_init(&zone->lock);

spin_lock_init(&zone->lru_lock);

zone_seqlock_init(zone); // 空语句

zone->zone_pgdat = pgdat;

zone->prev_priority = DEF_PRIORITY;

zone_pcp_init(zone);

INIT_LIST_HEAD(&zone->active_list);

INIT_LIST_HEAD(&zone->inactive_list);

zone->nr_scan_active = 0;

zone->nr_scan_inactive = 0;

zap_zone_vm_stats(zone); // 对vm_stat成员清0

atomic_set(&zone->reclaim_in_progress, 0);

if (!size)

continue;

ret = init_currently_empty_zone(zone, zone_start_pfn,

size, MEMMAP_EARLY);

BUG_ON(ret);

zone_start_pfn += size;

}

当程序运行到此的时候，pgdat->node_id的值为0，pgdat->node_start_pfn的值也为0。

从上述代码可以看出，nr_kernel_pages和nr_all_pages这两个值都表示可用的页的数量，其表示的内存范围从0到 60M ，不包含page数组所占用的页。对于64MSDRAM(实际限制为 60M )，不启用MTD的情况，其值为0x3b 6a 。

从上述代码还可以看出zone->spanned_pages和zone->present_pages这两个成员都表示可用的SDRAM的页的数量，但present_pages在spanned_pages的基础上减去了page数组所占用的页数。对于64MSDRAM，不启用MTD而言，内存实际限制在 60M 。spanned_pages的值为0x3bff，而present_pages的值则为0x3b 6a 。

1.1.1.2 init_currently_empty_zone

如果一个zone的大小非0，将调用本函数，实际上内核也仅对ZONE_DMA这个区域调用此函数。这个函数位于mm/page_alloc.c：

__meminit int init_currently_empty_zone(struct zone *zone,

unsigned long zone_start_pfn,

unsigned long size,

enum memmap_context context)

{

struct pglist_data *pgdat = zone->zone_pgdat;

int ret;

ret = zone_wait_table_init(zone, size);

if (ret)

return ret;

pgdat->nr_zones = zone_idx(zone) + 1;

zone->zone_start_pfn = zone_start_pfn;

memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);

zone_init_free_lists(pgdat, zone, zone->spanned_pages);

return 0;

}

当调用此函数时，zone_start_pfn的值为0，size的值为整个SDRAM区域的页面数量，对于 64M 内存(实际限制为 60M )，这个值为0x3bff。context的值则为MEMMAP_EARLY。

zone_wait_table_init函数将计算zone中wait_table相关成员的值。

memmap_init这个函数将为每个page结构体设置初始值。

zone_init_free_lists这个函数将初始化与buddy算法有关的free_area成员。

1.1.1.3 zone_wait_table_init

关于zone里面的wait_table相关的3个成员，注释已经说得很清楚了：

* wait_table -- the array holding the hash table

* wait_table_hash_nr_entries -- the size of the hash table array

* wait_table_bits -- wait_table_size == (1 << wait_table_bits)

* The purpose of all these is to keep track of the people

* waiting for a page to become available and make them

* runnable again when possible. The trouble is that this

* consumes a lot of space, especially when so few things

* wait on pages at a given time. So instead of using

* per-page waitqueues, we use a waitqueue hash table.

* The bucket discipline is to sleep on the same queue when

* colliding and wake all in that wait queue when removing.

* When something wakes, it must check to be sure its page is

* truly available, a la thundering herd. The cost of a

* collision is great, but given the expected load of the

* table, they should be so rare as to be outweighed by the

* benefits from the saved space.

* __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the

* primary users of these fields, and in mm/page_alloc.c

* free_area_init_core() performs the initialization of them.

下面来看看它们是怎样初始化的：

static noinline __init_refok

int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)

{

int i;

struct pglist_data *pgdat = zone->zone_pgdat;

size_t alloc_size;

* The per-page waitqueue mechanism uses hashed waitqueues

* per zone.

zone->wait_table_hash_nr_entries =

wait_table_hash_nr_entries(zone_size_pages);

zone->wait_table_bits =

wait_table_bits(zone->wait_table_hash_nr_entries);

alloc_size = zone->wait_table_hash_nr_entries

* sizeof(wait_queue_head_t);

if (system_state == SYSTEM_BOOTING) {

zone->wait_table = (wait_queue_head_t *)

alloc_bootmem_node(pgdat, alloc_size);

} else {

* This case means that a zone whose size was 0 gets new memory

* via memory hot-add.

* But it may be the case that a new node was hot-added. In

* this case vmalloc() will not be able to use this new node's

* memory - this wait_table must be initialized to use this new

* node itself as well.

* To use this new node's memory, further consideration will be

* necessary.

zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size);

}

if (!zone->wait_table)

return -ENOMEM;

for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)

init_waitqueue_head(zone->wait_table + i);

return 0;

}

这里比较值得关注的是wait_table_hash_nr_entries的计算，它通过wait_table_hash_nr_entries函数来计算：

* Helper functions to size the waitqueue hash table.

* Essentially these want to choose hash table sizes sufficiently

* large so that collisions trying to wait on pages are rare.

* But in fact, the number of active page waitqueues on typical

* systems is ridiculously low, less than 200. So this is even

* conservative, even though it seems large.

* The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to

* waitqueues, i.e. the size of the waitq table given the number of pages.

#define PAGES_PER_WAITQUEUE 256

static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)

{

unsigned long size = 1;

pages /= PAGES_PER_WAITQUEUE;

while (size < pages)

size <<= 1;

* Once we have dozens or even hundreds of threads sleeping

* on IO we've got bigger problems than wait queue collision.

* Limit the size of the wait table to a reasonable size.

size = min(size, 4096UL);

return max(size, 4UL);

}

在这里pages为内存区的总页数，对于 64M 内存(限制为 60M )，其值为0x3bff。此函数计算所得的结果将为0x40。即zone->wait_table_hash_nr_entries的值将为0x40，而zone->wait_table_bits的值将为6。

1.1.1.4 zone_init_free_lists

void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,

unsigned long size)

{

int order;

for (order = 0; order < MAX_ORDER ; order++) {

INIT_LIST_HEAD(&zone->free_area[order].free_list);

zone->free_area[order].nr_free = 0;

}

#define MAX_ORDER 11

在buddy算法中，将空闲页面分为11个块链表，每个块链表分别包含大小为1、2、4、8、16、32、64、128、256、512和1024个连续的页。为了表示此链表，在zone结构体中使用了

* free areas of different sizes

spinlock_t lock;

struct free_area free_area[MAX_ORDER];

进行表示，在这个函数中实际就是初始化这个成员。

参考资料

uClinux2.6(bf561)中的CPLB( 2008/2/19 )

uclinux2.6(bf561)中的bootmem分析(1)：猜测( 2008/5/9 )

uclinux2.6(bf561)中的bootmem分析(2)：调用前的参数分析( 2008/5/9 )

uclinux2.6(bf561)中的bootmem分析(3)：init_bootmem_node( 2008/5/9 )

uclinux2.6(bf561)中的bootmem分析(4)：alloc_bootmem_pages( 2008/5/9 )

uclinux2.6(bf561)内核中的paging_init( 2008/5/12 )

uclinux-2008r1(bf561)内核的icache支持(1)：寄存器配置初始化( 2008/5/16 )

uclinux-2008r1(bf561)内核的icache支持(2)：icplb_table的生成( 2008/5/16 )

uclinux-2008r1(bf561)内核的icache支持(3)：__fill_code_cplbtab( 2008/5/16 )

uclinux-2008r1(bf561)内核的icache支持(4)：换页问题( 2008/5/16 )

再读uclinux-2008r1(bf561)内核中的bootmem( 2008/6/3 )

uclinux-2008r1(bf561)内核中与存储管理相关的几个全局变量( 2008/6/4 )

uclinux-2008r1(bf561)内核存储区域初探( 2008/6/4 )

uclinux-2008r1(bf561)内核中的zonelist初始化( 2008/6/5 )

uclinux-2008r1(bf561)内核中内存管理相关的几个结构体( 2008/6/5 )

再读内核存储管理(1)：相关的全局变量( 2008/6/17 )

再读内核存储管理(2)：相关的数据结构( 2008/6/17 )

再读内核存储管理(3)：bootmem分配策略( 2008/6/17 )

再读内核存储管理(4)：存储区域管理( 2008/6/17 )

再读内核存储管理(5)：buddy算法( 2008/6/17 )

再读内核存储管理(6)：高速缓存的应用( 2008/6/17 )

再读内核存储管理(7)：icache支持( 2008/6/17 )

再读内核存储管理(8)：片内SRAM的使用( 2008/6/17 )

初读SLAB( 2008/6/26 )

三读bootmem( 2008/7/24 )

再读uclinux-2008r1(bf561)内核存储区域管理（1）：相关数据结构( 2008/7/25 )

再读uclinux-2008r1(bf561)内核存储区域管理（2）：可用页表初始化( 2008/7/25 )

嵌云阁主

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
再读uclinux-2008r1(bf561)内核存储区域管理（3）：zone初始化

快乐虾http://blog.csdn.net/lights_joy/lights@hb165.com 本文适用于ADI bf561 DSP优视BF561EVB开发板uclinux-2008r1-rc8 (移植到vdsp5)Visual DSP++ 5.0 欢迎转载，但请保留作者信息 1.1.1 zone初始化在对pglist_
复制链接

扫一扫