How Linux Initializes Memory - TCC8900

This post tries to describe how Linux initializes the memory system, based on TCC8900 BSP.


PART 1. The SOC's memory space layout:

First of all, we need to understand the TCC8900's memory address map:(according to official chip spec)

------------------------------------------------------------------

0xFFFFFFFF

                          On-chip peripherals  ( 256MB)

0xF0000000

______________________________________

0xE0003FFF

                          Internal ROM               (16KB)

0xE0000000

______________________________________

0x7FFFFFFF

                           Off-chip DDR2            (1GB maximal. On PV board, there are 256MB ddr2)

0x40000000

_______________________________________

0x10003FFF

                            Internal RAM               (16KB)

0x10000000

_______________________________________

0x00003FFF

                            Instruction TCM          (16KB)

0x00000000

_______________________________________


PART 2. booting linux

Second of all, we'd better glance over the Bootloader for better understanding:

1. The ARM startup code, tcboot/main/start.S, jump to function init_system() in tcboot/main/init_system.c

            ldr pc, _init_system

     _init_system:

            .word init_system

2. The main function, tcc_main() in tcboot/main/boot_main.c is called.

3. tcc_main() then calls loader() in tcboot/main/loader.c to load Kernel & Ramdisc images from NAND to DDR.

4. Inside routine loader(), KERNEL & RAMDISK are loaded to DDR (start from 0x40300000), then simply jump to that address.

        void loader(void)
        {
            unsigned char* ptr;
            unsigned long pageCount;
            short retry;

            printf("Loading...\n");

            retry = 3;
            while(retry--) {


                /* Pointer ptr points to 0x40000000 + 0x300000

                 * 0x40000000 is the start address of DDR2;

                 * 0x300000 is the Kernel start offset.

                */

                ptr = (unsigned char *)KERNEL_BASEADDRESS;

                /*
                 * Read Signature,

                 * Read the first PAGE from NANDFlash to get control information

                 * Note that the default source is NANDFLASH since the NAND_INCLUDE is defined in drivers/fwdn/fwdn_drv_v7.c

                */
                #if defined(NAND_INCLUDE) && defined(NAND_KERNEL_INCLUDE)
                NAND_Ioctl(0, 0);
                NAND_HDReadPage(HIDDEN_OFFSET_OF_KERNEL, 1, ptr);
                #elif defined(TRIFLASH_INCLUDE) && defined(TRIFLASH_KERNEL_INCLUDE)
               ...
                #endif

                /* To get Kernel/Ramdisk length and CRC*/
                memcpy(&gProgramLength, ptr+0x1C, 4);
                memcpy(&gCrc, ptr+0x18, 4);
                memcpy(&gRamdiskLength, ptr+0x14, 4);


               ...
                WATCHDOG_CLEAR;


                /*
                 * Load Kernel to DDR
                 */
                 pageCount = (gProgramLength + gRamdiskLength) / NAND_PAGE_SIZE;
                 if ( (gProgramLength + gRamdiskLength) % NAND_PAGE_SIZE )
                    pageCount++;


                 #if defined(NAND_INCLUDE) && defined(NAND_KERNEL_INCLUDE)
                 WATCHDOG_CLEAR;
                 NAND_HDReadPage(HIDDEN_OFFSET_OF_KERNEL, pageCount, ptr);
                 WATCHDOG_CLEAR;
                 #elif defined(TRIFLASH_INCLUDE) && defined(TRIFLASH_KERNEL_INCLUDE)
               ...
                 #endif
                 WATCHDOG_CLEAR;

                 /*
                  * Check CRC
                  */
                 #ifdef FULL_CRC_CHECK
                 if (VerifyROMFile((unsigned int *)KERNEL_BASEADDRESS, gProgramLength) == 1)
                 #else
                 if (VerifyROMFile_128K((unsigned int *)KERNEL_BASEADDRESS, gProgramLength) == 1)
                 #endif
                 {
                       printf("Load Ok! Jump to 0x%x (0x%x)\n\n", KERNEL_BASEADDRESS, *(unsigned long *)KERNEL_BASEADDRESS);

                       /* Start Kernel */
                       jump_to_linux();


                  } else {
                        printf("ERROR: invalid CRC, retry...\n");
                  }
            }//End of while(retry

//fail:
    printf("\nKernel loading Failed!\n");
//#endif
}


PART 3. The Linux memory subsystem initialization

Section 1.

Now, we can start to figure out how Linux initializes the memory sub system. The story of memory initialization starts from 

/arch/arm/kernel/setup.c -> setup_arch(char **cmdline_P)

 
void __init setup_arch(char **cmdline_p)
{

struct tag *tags = (struct tag *)&init_tags;

/*This struct is defined in the same file, seup.c. We will see how to use it later

* static struct init_tags {
        struct tag_header hdr1;
        struct tag_core   core;
        struct tag_header hdr2;
        struct tag_mem32  mem;
        struct tag_header hdr3;
          } init_tags __initdata = {
        { tag_size(tag_core), ATAG_CORE },
        { 1, PAGE_SIZE, 0xff },
        { tag_size(tag_mem32), ATAG_MEM },
        { MEM_SIZE, PHYS_OFFSET },
        { 0, ATAG_NONE }
          };

*/

struct machine_desc *mdesc;

        /* TCC8900 machine descriptor, arch/arm/mach-tcc8900/board-tcc8900.c
            MACHINE_START(TCC8900, "Povell PV8900 Full Function")
                /* Maintainer: Telechips Linux BSP Team <linux@telechips.com> */
                .phys_io        = 0xf0000000,
                .io_pg_offst    = ((0xf0000000) >> 18) & 0xfffc,
                .boot_params    = PHYS_OFFSET + 0x00000100,
                .map_io         = tcc8900_map_io,
                .init_irq       = tcc8900_init_irq,
                .init_machine   = tcc8900_init_machine,
                .timer          = &tcc8900_timer,
            MACHINE_END

        */

char *from = default_command_line;

/*
* Telechips Board Memory Setting
*  - refer to arch/arm/mach-tcc8900/include/mach/memory.h
*/

char *tcc_mem_size = TCC_MEM_SIZE;
strcat(default_command_line, tcc_mem_size);


setup_processor();
mdesc = setup_machine(machine_arch_type);
machine_name = mdesc->name;


if (mdesc->soft_reboot)
reboot_setup("s");

         //defined in arch/arm/kernel/head-common.S
//both __atags_pointer and boot_params are NOT available in MTR-VGA-AV
if (__atags_pointer)
tags = phys_to_virt(__atags_pointer);
else if (mdesc->boot_params)
tags = phys_to_virt(mdesc->boot_params);


/*
* If we have the old style parameters, convert them to
* a tag list.

*/

        //The bootloader does NOT pass tags to the Kernel at all. So here we use the
//default TAGs, init_tags, see above.
//The only problem is that the memory size is 16MB:
//#define MEM_SIZE(16*1024*1024)

if (tags->hdr.tag != ATAG_CORE)
convert_to_tag_list(tags);
if (tags->hdr.tag != ATAG_CORE)
tags = (struct tag *)&init_tags;


if (mdesc->fixup)
mdesc->fixup(mdesc, tags, &from, &meminfo);

         //David: nr-banks is ZERO here on MTR-VGA-AV, means the memory has NOT been
//initialized so far.

if (tags->hdr.tag == ATAG_CORE) {
if (meminfo.nr_banks != 0)
squash_mem_tags(tags);
save_atags(tags);
parse_tags(tags);
}


init_mm.start_code = (unsigned long) &_text;
init_mm.end_code   = (unsigned long) &_etext;
init_mm.end_data   = (unsigned long) &_edata;
init_mm.brk   = (unsigned long) &_end;
         //David: the values are
//init_mm.start_code = 0xc0120000
//init_mm.end_code = 0xc04fe000
//init_mm.end_data = 0xc05285a8
//init_mm.brk = 0xc055c3dc


memcpy(boot_command_line, from, COMMAND_LINE_SIZE);

boot_command_line[COMMAND_LINE_SIZE-1] = '\0';

        /**

parse_cmdline()
This routine is used to parse command line parameters to corresponding global
variables.

To do this, Linux defines a structure:
struct early_params {
      const char *arg;
      void (*fn)(char **p);
};
to make arg / function couples to deal with each parameter.

From the System.map, we get
c0118d0c T __early_begin
c0118d0c t __early_early_mem//arch/arm/kernel/setup.c, handles "mem=" 
c0118d0c T __setup_end
c0118d14 t __early__early_initrd//arch/arm/mm/init.c, handles "initrd="
c0118d1c t __early_early_vmalloc//arch/arm/mm/mmu.c, handles "vmalloc="
c0118d24 t __early_early_ecc//arch/arm/mm/mmu.c, handles "ecc="
c0118d2c t __early_early_nowrite//arch/arm/mm/mmu.c, handles "nowrite="
c0118d34 t __early_early_nocache//arch/arm/mm/mmu.c, handles "nocache="
c0118d3c t __early_early_cachepolicy//arch/arm/mm/mmu.c, handles "cachepolicy="
c0118d44 T __early_end

In case of MTR-VGA-AV, the input cmdline is
root=dev/ram rw initrd=0x40800000,0x2000000 init=/linuxrc console=ttySAC0 mem=182M

For example, the routine early_mem() will take 182M from "mem=" and calls 
arm_add_memory() to put memory info into structure meminfo:
In case of MTR-VGA-AV, bank: start = 0x40200000, size = 0xb600000, node = 0x0

Another example is that the routne early_initrd will put ramdisc's address and
size to global phys_initrd_start & phys_initrd_size respectively.
In case of MTR-VGA-AV, initrd: start = 0x40800000, size = 0x2000000


        In this function, a memory bank (the DDR2) was added to kernel via early_mem() -> arm_add_memory(), which

        start address = 0x40200000. comes from macro PHYS_OFFSET in memory.h

        length = 0xb600000, comes from the command line, MEM=182M

        **/

parse_cmdline(cmdline_p, from);


       /* function paging_init() creates mappings for memory. Explained in next section*/ 

paging_init(&meminfo, mdesc);


request_standard_resources(&meminfo, mdesc);


#ifdef CONFIG_SMP
smp_init_cpus();
#endif


cpu_init();


/*
* Set up various architecture-specific pointers
*/
init_arch_irq = mdesc->init_irq;
system_timer = mdesc->timer;
init_machine = mdesc->init_machine;


#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
early_trap_init();
}


Section 2.

Up to now, Linux has known that we have 182MB DDR2 memory, which address is from 0x40200000 to 0x4b800000, need to be mapped to virtual address.

Function paging_init(), as its name implies, creates page tables for each memory block. 

To better understand this function, we need to know basics of ARM virtual - physical memory translating. Reference another post of this blog, "ARM Virtual - Physical Memory Translation".

The function lists below. Each sub-routine will be described in detail in following sections.


/*
 * paging_init() sets up the page tables, initialises the zone memory
 * maps, and sets up the zero page, bad page and bad page tables.
 */

void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
{
void *zero_page;

build_mem_type_table();
sanity_check_meminfo(mi);
prepare_page_table(mi);
bootmem_init(mi);
devicemaps_init(mdesc);

top_pmd = pmd_off_k(0xffff0000);

/*
* allocate the zero page.  Note that we count on this going ok.
*/

zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
memzero(zero_page, PAGE_SIZE);
empty_zero_page = virt_to_page(zero_page);
flush_dcache_page(empty_zero_page);
}

A.  build_mem_type_table();

I prefer to depict this routine in a higher level, but not list source code here.

Linux defined a struct mem_type to describes all possible memory types in system, see below default definition in arch/arm/mm/mmu.c.

This routine simply modified some fields according to ARM architecture version.  BTW, TCC8900's ARM CORE version is ARMv7.

What we need to be aware of here is that each macro, DOMAIN_IO, L_PET_DEV_SHARED, PMD_TYPE_TABLE... is a flag and will be eventually put into correspond page table entries.


/* arch/arm/mm/mm.h*/
struct mem_type {
unsigned int prot_pte; the bits[11:0] of 2nd level small page descriptor
unsigned int prot_l1; the bits[9, 4:0] of 1st level coarse page table descriptor
unsigned int prot_sect; the bits[19:9, 4:0] of 1st level section descriptor
unsigned int domain; the bits[8:5] of 1st level descriptor
};

see table B4-2 and B4-4 below for details.

static struct mem_type mem_types[] = {

[MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
.domain = DOMAIN_IO,
},
[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_DEVICE_CACHED] = { /* ioremap_cached */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
.domain = DOMAIN_IO,
},
[MT_DEVICE_WC] = {/* ioremap_wc */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_UNCACHED] = {
.prot_pte = PROT_PTE_DEVICE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_IO,
},
[MT_CACHECLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain    = DOMAIN_KERNEL,
},
[MT_MINICLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
.domain    = DOMAIN_KERNEL,
},
[MT_LOW_VECTORS] = {
.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_EXEC,
.prot_l1   = PMD_TYPE_TABLE,
.domain    = DOMAIN_USER,
},
[MT_HIGH_VECTORS] = {
.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_USER | L_PTE_EXEC,
.prot_l1   = PMD_TYPE_TABLE,
.domain    = DOMAIN_USER,
},
[MT_MEMORY] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain    = DOMAIN_KERNEL,
},
[MT_MEMORY_TCC] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_UNCACHED,
.domain    = DOMAIN_KERNEL,
},
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain    = DOMAIN_KERNEL,
},
};


The relative macro-definition list blow:

/* Macros describe memory types*/

#define MT_DEVICE 0

#define MT_DEVICE_NONSHARED1
#define MT_DEVICE_CACHED 2
#define MT_DEVICE_WC 3

#define MT_UNCACHED 4

#define MT_CACHECLEAN 5
#define MT_MINICLEAN 6
#define MT_LOW_VECTORS 7
#define MT_HIGH_VECTORS 8
#define MT_MEMORY 9
#define MT_ROM 10
#define MT_MEMORY_TCC 11

/* Linux domain definition */
#define DOMAIN_KERNEL2
#define DOMAIN_TABLE2
#define DOMAIN_USER1
#define DOMAIN_IO0




/*
 * Level 1 descriptor (PMD)
 *   - common
 */

#define PMD_TYPE_MASK(3 << 0)
#define PMD_TYPE_FAULT(0 << 0)
#define PMD_TYPE_TABLE(1 << 0)
#define PMD_TYPE_SECT(2 << 0)
#define PMD_BIT4(1 << 4)
#define PMD_DOMAIN(x)((x) << 5)
#define PMD_PROTECTION(1 << 9)/* v5 */
/*
 *   - section
 */
#define PMD_SECT_BUFFERABLE(1 << 2)
#define PMD_SECT_CACHEABLE(1 << 3)
#define PMD_SECT_XN(1 << 4)/* v6 */
#define PMD_SECT_AP_WRITE(1 << 10)
#define PMD_SECT_AP_READ(1 << 11)
#define PMD_SECT_TEX(x)((x) << 12)/* v5 */
#define PMD_SECT_APX(1 << 15)/* v6 */
#define PMD_SECT_S(1 << 16)/* v6 */
#define PMD_SECT_nG(1 << 17)/* v6 */
#define PMD_SECT_SUPER(1 << 18)/* v6 */




/*
 * + Level 2 descriptor (PTE)
 *   - common
 */

#define PTE_TYPE_MASK (3 << 0)
#define PTE_TYPE_FAULT (0 << 0)
#define PTE_TYPE_LARGE (1 << 0)
#define PTE_TYPE_SMALL (2 << 0)
#define PTE_TYPE_EXT (3 << 0) /* v5 */
#define PTE_BUFFERABLE (1 << 2)
#define PTE_CACHEABLE (1 << 3)


/*
 *   - extended small page/tiny page
 */
#define PTE_EXT_XN (1 << 0) /* v6 */
#define PTE_EXT_AP_MASK (3 << 4)
#define PTE_EXT_AP0 (1 << 4)
#define PTE_EXT_AP1 (2 << 4)
#define PTE_EXT_AP_UNO_SRO (0 << 4)
#define PTE_EXT_AP_UNO_SRW (PTE_EXT_AP0)
#define PTE_EXT_AP_URO_SRW (PTE_EXT_AP1)
#define PTE_EXT_AP_URW_SRW (PTE_EXT_AP1|PTE_EXT_AP0)
#define PTE_EXT_TEX(x) ((x) << 6) /* v5 */
#define PTE_EXT_APX (1 << 9) /* v6 */
#define PTE_EXT_COHERENT (1 << 9) /* XScale3 */
#define PTE_EXT_SHARED (1 << 10) /* v6 */
#define PTE_EXT_NG (1 << 11) /* v6 */


/*
 *   - small page
 */
#define PTE_SMALL_AP_MASK (0xff << 4)
#define PTE_SMALL_AP_UNO_SRO (0x00 << 4)
#define PTE_SMALL_AP_UNO_SRW (0x55 << 4)
#define PTE_SMALL_AP_URO_SRW (0xaa << 4)
#define PTE_SMALL_AP_URW_SRW (0xff << 4)

/*

 * "Linux" PTE definitions.
 *
 * We keep two sets of PTEs - the hardware and the linux version.
 * This allows greater flexibility in the way we map the Linux bits
 * onto the hardware tables, and allows us to have YOUNG and DIRTY
 * bits.
 *
 * The PTE table pointer refers to the hardware entries; the "Linux"
 * entries are stored 1024 bytes below.
 */
#define L_PTE_PRESENT (1 << 0)
#define L_PTE_FILE (1 << 1)/* only when !PRESENT */
#define L_PTE_YOUNG (1 << 1)
#define L_PTE_BUFFERABLE (1 << 2)/* obsolete, matches PTE */
#define L_PTE_CACHEABLE (1 << 3)/* obsolete, matches PTE */
#define L_PTE_DIRTY (1 << 6)
#define L_PTE_WRITE (1 << 7)
#define L_PTE_USER (1 << 8)
#define L_PTE_EXEC (1 << 9)
#define L_PTE_SHARED (1 << 10)/* shared(v6), coherent(xsc3) */


/*
 * These are the memory types, defined to be compatible with
 * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
 */
#define L_PTE_MT_UNCACHED (0x00 << 2)/* 0000 */
#define L_PTE_MT_BUFFERABLE (0x01 << 2)/* 0001 */
#define L_PTE_MT_WRITETHROUGH (0x02 << 2)/* 0010 */
#define L_PTE_MT_WRITEBACK (0x03 << 2)/* 0011 */
#define L_PTE_MT_MINICACHE (0x06 << 2)/* 0110 (sa1100, xscale) */
#define L_PTE_MT_WRITEALLOC (0x07 << 2)/* 0111 */
#define L_PTE_MT_DEV_SHARED (0x04 << 2)/* 0100 */
#define L_PTE_MT_DEV_NONSHARED (0x0c << 2)/* 1100 */
#define L_PTE_MT_DEV_WC (0x09 << 2)/* 1001 */
#define L_PTE_MT_DEV_CACHED (0x0b << 2)/* 1011 */
#define L_PTE_MT_MASK (0x0f << 2)

Anyway, at the end of this routine, the mem_types[ ] array looks like below:

Memory policy: ECC disabled, Data cache writeback

mem_type[0]: prot_sect = 0x10456, prot_l1 = 0x41, prot_pte = 0x4d3, domain = 0x2, 
mem_type[1]: prot_sect = 0x2452, prot_l1 = 0x41, prot_pte = 0xf3, domain = 0x2, 
mem_type[2]: prot_sect = 0x45e, prot_l1 = 0x41, prot_pte = 0xef, domain = 0x2, 
mem_type[3]: prot_sect = 0x1452, prot_l1 = 0x41, prot_pte = 0xe7, domain = 0x2, 
mem_type[4]: prot_sect = 0x52, prot_l1 = 0x41, prot_pte = 0xc3, domain = 0x2, 
mem_type[5]: prot_sect = 0x841e, prot_l1 = 0x0, prot_pte = 0x0, domain = 0x0, 
mem_type[6]: prot_sect = 0x941a, prot_l1 = 0x0, prot_pte = 0x0, domain = 0x0, 
mem_type[7]: prot_sect = 0x0, prot_l1 = 0x21, prot_pte = 0x24b, domain = 0x1, 
mem_type[8]: prot_sect = 0x0, prot_l1 = 0x21, prot_pte = 0x34b, domain = 0x1, 
mem_type[9]: prot_sect = 0x40e, prot_l1 = 0x0, prot_pte = 0x0, domain = 0x0, 
mem_type[10]: prot_sect = 0x840e, prot_l1 = 0x0, prot_pte = 0x0, domain = 0x0, 
mem_type[11]: prot_sect = 0x402, prot_l1 = 0x0, prot_pte = 0x0, domain = 0x0,

Let's take a closer look at mem_types[9], MT_MEMORY, the external DDR2 properties.

B. sanity_check_meminfo()

This routine is quite simple: just check if memory banks are invalid.


C. prepare_page_table(struct meminfo *mi)

This routine is also straight forward: It clears page table entries which mapping:

1. from 0 - 0xbf000000 (MODULES_VADDR)

2. from 0xbf000000 - 0xc0000000 (PAGE_OFFSET)

3. from 0xcb800000 (the end of the DDR2's virtual address_ - 0xe0000000 (VMALLOC_END)


D. bootmem_init(struct meminfo *mi)

This is the core function of creating page tables. 

First, let's take a look on its flowchart, then we will go through each routine step by step.

bootmem_init(struct meminfo *mi)

check_initrd(mi)

bootmem_init_node(node, mi) -> map_memory_bank(bank) -> create_mapping(struct map_desc *map) -> alloc_init_section()

reserve_node_zero()

bootmem_reserve_initrd(node)

sparse_init()

bootmem_free_node(node, mi);

set global variables: high_memory / max_pfn / max_low_pfn

As you might have noticed, the function bootmem_init_node() is the one who creates page tables. We will put focus on this.


D.1 bootmem_init_node(). This function gets the pfn (Page Frame Number) of the start / end physical address of the DDR2 memory:

start_pfn = 0x40200, end_pfn = 4b800;

Then call map_memory_bank(bank) /* bank.start = 0x40200000; bank.size = 0xb600000*/


D2 map_memory_bank() declares a struct map_desc object, map, and initializes it as:

map.pfn = bank_pfn_start(bank);= 0x40200

map.virtual = __phys_to_virt(bank_phys_start(bank));= 0xc0000000

map.length = bank_phys_size(bank);= 0xb600000

map.type = MT_MEMORY;= 9

Then call create_mapping(&map);


D3. void __init create_mapping(struct map_desc *md)

{

......

addr = md->virtual & PAGE_MASK;/* addr = 0xc0000000*/
phys = (unsigned long)__pfn_to_phys(md->pfn);/* phys = 0x0x40200000*/
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));/* length = 0xb600000*/
......
. /*

* pgd = 0xc00ff000

* The reason lies in the page table is supposed to map the whole virtual memory space, which is from 0 - 4G.

* Each 1st level entry occupies 4 bytes and maps 1MB memory. Therefore, to map 0 - 3G (0xc0000000), need

* 4bytes x 1MB x 1024 x 3 = 0x3000. 

* And, ARM specifies its 1st level page table's start address is 0xc00fc000 (swapper_pg_dir).

* So, the entry which maps virtual address 0xc0000000 resides at 0xc00fc000 + 0x3000 = 0xc00ff000

*/
pgd = pgd_offset_k(addr);


end = addr + length;


do {
unsigned long next = pgd_addr_end(addr, end);

/* as its name (section) implies, this routine creates 1st level entries for virtual address range from 0xc0000000 to 0xcb800000 

alloc_init_section(pgd, addr, next, phys, type);


phys += next - addr;
addr = next;
} while (pgd++, addr != end);
}

D4. alloc_init_section()

Before we jump into the source code, here is something we need be ware of.

in arch/arm/include/asm/pgtable.h:

/*
  * PMD_SHIFT determines the size of the area a second-level page table can map
 * PGDIR_SHIFT determines what a third-level page table entry can map
  */
#define PMD_SHIFT21
#define PGDIR_SHIFT 21

#define PMD_SIZE(1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))

#define SECTION_SHIFT20
#define SECTION_SIZE (1UL << SECTION_SHIFT)

The PMD_SIZE, PGDIR_SIZE are all 2 ORDER 21 = 0x200000 (2M).

The SECTION_SIZE is 2 ORDER 20 = 0x100000 (1M)

This denotes:

1. ARM linux does not distinguishes PGD from PMD, they are same thing.

2. Single PGD / PMD entry maps 1MB memory because it uses SECTION_SIZE as the unit, see code below.

static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,

      unsigned long end, unsigned long phys,

      const struct mem_type *type)

{

pmd_t *pmd = pmd_offset(pgd, addr);


/*
* Try a section mapping - end, addr and phys must all be aligned
* to a section boundary.  Note that PMDs refer to the individual
* L1 entries, whereas PGDs refer to a group of L1 entries making
* up one logical pointer to an L2 table.
*/
if (((addr | end | phys) & ~SECTION_MASK) == 0) {
pmd_t *p = pmd;


if (addr & SECTION_SIZE)
pmd++;


do {
*pmd = __pmd(phys | type->prot_sect);
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);


flush_pmd_entry(p);
} else {

/*
* No need to loop; pte's aren't interested in the
* individual L1 entries.
*/
alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
}
}

Up to now, the maps for DDR2 memory has been created, below are print information:

bootmem_init > initrd_node = 0, node = 0
bootmem_init_node > start = 0x40200, end = 0x4b800, start_pfn = 0x40200, end_pfn = 0x4b800
map_memory_bank > pfn = 0x40200, virtual = 0xc0000000, length = 0xb600000


create_mapping 0x40200000->0xc0000000(0xb600000)
md->type = 9
addr = 0xc0000000, phys = 0x40200000, length = 0xb600000
init_mm.pgd = 0xc00fc000


pgd = 0xc00ff000, addr = 0xc0000000, next = 0xc0200000, phys = 0x40200000, type.prot_pte = 0x0, , type.prot_l1 = 0x0, type.prot_sect = 0x40e, type.domain = 0x0
alloc_init_section: pmd = 0xc00ff000
pmd value = 0x4020040e
pmd value = 0x4030040e


pgd = 0xc00ff008, addr = 0xc0200000, next = 0xc0400000, phys = 0x40400000, type.prot_pte = 0x0, , type.prot_l1 = 0x0, type.prot_sect = 0x40e, type.domain = 0x0
alloc_init_section: pmd = 0xc00ff008
pmd value = 0x4040040e
pmd value = 0x4050040e

...

pgd = 0xc00ff2c8, addr = 0xcb200000, next = 0xcb400000, phys = 0x4b400000, type.prot_pte = 0x0, , type.prot_l1 = 0x0, type.prot_sect = 0x40e, type.domain = 0x0
alloc_init_section: pmd = 0xc00ff2c8
pmd value = 0x4b40040e
pmd value = 0x4b50040e


pgd = 0xc00ff2d0, addr = 0xcb400000, next = 0xcb600000, phys = 0x4b600000, type.prot_pte = 0x0, , type.prot_l1 = 0x0, type.prot_sect = 0x40e, type.domain = 0x0
alloc_init_section: pmd = 0xc00ff2d0
pmd value = 0x4b60040e
pmd value = 0x4b70040e


Let's do a little test here, say CPU issues a virtual address 0xcb200000, The MMU will do:

1. concatenates the bits[31:14] of CCBR (1100, 0000, 0000, 1111, 11) with bits[31:20] of the virtual address (1100, 1011, 0010) and two ZEROs, get the address of the    entry,  1100, 0000, 0000, 1111, 1111, 0010, 1100, 1000, 0xc00ff2c8.

2. fetch the entry, get 4b40040e. bits[1:0] = 0b10, this is a 1st level SECTION descriptor.

3. concatenates the bits[31:20] of the 1st level section descriptor with bits[19:0] of the virtual address, get 0x4b400000. This is the target physical address!

We can check this address with macro #define __phys_to_virt(x)((x) - PHYS_OFFSET + PAGE_OFFSET) .

It's CORRECT!



So far, the entries for mapping DDR2 memory has been created. Next, the maps for other memory blocks need to be created. Let's go back to paging_init() and see what happens.


E. devicemaps_init (struct machine_desc *mdesc)

    This routine creates maps for 

1). interrupt vectors, and

2). IO spaces.

Let's take a look in detail:


1). Creates maps for Interrupt Vector

/* Allocate the vector page early 

                *  ARM architecture either put vector at HIGH address, 0xffff0000

*  or LOW address, 0x00000000.  TCC8900 chooses HIGH vector 

*

*  alloc_bootmem_low_pages(length_in_byte) is a macro of bootmem allocator 

*  I will explore bootmem allocator in another post.

*  Here what we need to know is that it returns a page frame's (since the parameter, PAGE_SIZE, 4KB, 0x 1000) start address*/

vectors = alloc_bootmem_low_pages(PAGE_SIZE);

/* Clear entries which maps virtuall address range from VMALLOC_END (0xe0000000) to 4GB */

for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));

/* creates mappings for the vector page: mapping to virtual address 0xffff0000, HIGH vector*/

map.pfn = __phys_to_pfn(virt_to_phys(vectors));
map.virtual = 0xffff0000;
map.length = PAGE_SIZE;
map.type = MT_HIGH_VECTORS;
create_mapping(&map);


Blow is log. Be noticed since the vector is NOT 1MB aligned, in addition to pmd, it also creates a pte (2nd level entry)

create_mapping:0x42971000->0xffff0000(0x1000)
md->type = 8
addr = 0xffff0000, phys = 0x42971000, length = 0x1000
pgd = 0xc00ffff8, addr = 0xffff0000, next = 0xffff1000, phys = 0x42971000, type.prot_pte = 0x34b, , type.prot_l1 = 0x21, type.prot_sect = 0x0, type.domain = 0x1

alloc_init_section: pmd = 0xc00ffff8
alloc_init_pte
(pmd_none)pte = 0xc2772000
pmd = 0xc00ffff8, pmd value = 0x42972021
pte = 0xc2772fc0
pfn = 0x42971


2) mapping IO spaces

if (mdesc->map_io)
mdesc->map_io();


The field map_io of machine_desc points to function tcc8900_map_io(), it eventually calls function iotable_init().

This function is easy, creates mappings for each IO region.

void __init iotable_init(struct map_desc *io_desc, int nr)
{
int i;
for (i = 0; i < nr; i++)
create_mapping(io_desc + i);
}


TCC8900 defines its IO regions (9 regions) in /arch/arm/mach-tcc8900/io.c.

Except the last region, other regions' virtual / physical are the same. No reasons, that's it.

static struct map_desc tcc8900_io_desc[] __initdata = {
    {
        .virtual    = 0xF0000000,
        .pfn        = __phys_to_pfn(0xF0000000),   
        .length     = 0x100000,                   
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xF0100000,
        .pfn        = __phys_to_pfn(0xF0100000),
        .length     = 0x100000,               
        .type       = MT_DEVICE
    },
   {
        .virtual    = 0xF0200000,
        .pfn        = __phys_to_pfn(0xF0200000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xF0300000,
        .pfn        = __phys_to_pfn(0xF0300000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xF0400000,
        .pfn        = __phys_to_pfn(0xF0400000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xF0500000,
        .pfn        = __phys_to_pfn(0xF0500000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xF0600000,
        .pfn        = __phys_to_pfn(0xF0600000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
  },
    {
        .virtual    = 0xF0700000,
        .pfn        = __phys_to_pfn(0xF0700000),
        .length     = 0x100000,                
        .type       = MT_DEVICE
    },
    {
        .virtual    = 0xEFF00000,
        .pfn        = __phys_to_pfn(0x10000000),
        .length     = 0x100000,                
        .type       = MT_MEMORY_TCC
    },
};

   

Blow is log: Given each IO region's length is exact 1MB (0x100000), single 1st level descriptor is enough.

create_mapping:0xf0000000->0xf0000000(0x100000)
md->type = 0
addr = 0xf0000000, phys = 0xf0000000, length = 0x100000
pgd = 0xc00ffc00, addr = 0xf0000000, next = 0xf0100000, phys = 0xf0000000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc00
pmd value = 0xf0010456


create_mapping:0xf0100000->0xf0100000(0x100000)
md->type = 0
addr = 0xf0100000, phys = 0xf0100000, length = 0x100000
pgd = 0xc00ffc00, addr = 0xf0100000, next = 0xf0200000, phys = 0xf0100000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc00
pmd value = 0xf0110456



create_mapping:0xf0200000->0xf0200000(0x100000)
md->type = 0
addr = 0xf0200000, phys = 0xf0200000, length = 0x100000
pgd = 0xc00ffc08, addr = 0xf0200000, next = 0xf0300000, phys = 0xf0200000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc08
pmd value = 0xf0210456


create_mapping:0xf0300000->0xf0300000(0x100000)
md->type = 0
addr = 0xf0300000, phys = 0xf0300000, length = 0x100000
pgd = 0xc00ffc08, addr = 0xf0300000, next = 0xf0400000, phys = 0xf0300000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc08
pmd value = 0xf0310456


create_mapping:0xf0400000->0xf0400000(0x100000)
md->type = 0
addr = 0xf0400000, phys = 0xf0400000, length = 0x100000
pgd = 0xc00ffc10, addr = 0xf0400000, next = 0xf0500000, phys = 0xf0400000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc10
pmd value = 0xf0410456


create_mapping:0xf0500000->0xf0500000(0x100000)
md->type = 0
addr = 0xf0500000, phys = 0xf0500000, length = 0x100000
pgd = 0xc00ffc10, addr = 0xf0500000, next = 0xf0600000, phys = 0xf0500000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc10
pmd value = 0xf0510456


create_mapping:0xf0600000->0xf0600000(0x100000)
md->type = 0
addr = 0xf0600000, phys = 0xf0600000, length = 0x100000
pgd = 0xc00ffc18, addr = 0xf0600000, next = 0xf0700000, phys = 0xf0600000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc18
pmd value = 0xf0610456


create_mapping:0xf0700000->0xf0700000(0x100000)
md->type = 0
addr = 0xf0700000, phys = 0xf0700000, length = 0x100000
pgd = 0xc00ffc18, addr = 0xf0700000, next = 0xf0800000, phys = 0xf0700000, type.prot_pte = 0x4d3, , type.prot_l1 = 0x41, type.prot_sect = 0x10456, type.domain = 0x2

alloc_init_section: pmd = 0xc00ffc18
pmd value = 0xf0710456


create_mapping:0x10000000->0xeff00000(0x100000)
md->type = 11
addr = 0xeff00000, phys = 0x10000000, length = 0x100000
pgd = 0xc00ffbf8, addr = 0xeff00000, next = 0xf0000000, phys = 0x10000000, type.prot_pte = 0x0, , type.prot_l1 = 0x0, type.prot_sect = 0x402, type.domain = 0x0

alloc_init_section: pmd = 0xc00ffbf8
pmd value = 0x10000402




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值